Merge branch 'master' into fix-kafka-again

This commit is contained in:
Ivan Lezhankin 2019-04-16 14:00:48 +03:00
commit dffe0eba40
378 changed files with 11940 additions and 2448 deletions

View File

@ -317,6 +317,7 @@ include (cmake/find_hdfs3.cmake) # uses protobuf
include (cmake/find_consistent-hashing.cmake)
include (cmake/find_base64.cmake)
include (cmake/find_hyperscan.cmake)
include (cmake/find_lfalloc.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
find_contrib_lib(metrohash)

View File

@ -10,7 +10,3 @@ ClickHouse is an open-source column-oriented database management system that all
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
* [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Community Meetup](https://www.eventbrite.com/e/clickhouse-meetup-in-madrid-registration-55376746339) in Madrid on April 2.

9
cmake/find_lfalloc.cmake Normal file
View File

@ -0,0 +1,9 @@
if (NOT SANITIZE AND NOT ARCH_ARM AND NOT ARCH_32 AND NOT ARCH_PPC64LE)
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lfalloc/src/lf_allocX64.h")
message (FATAL_ERROR "submodule contrib/lfalloc is missing. to fix try run: \n git submodule update --init --recursive")
endif()
set (USE_LFALLOC 1)
set (USE_LFALLOC_RANDOM_HINT 1)
set (LFALLOC_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lfalloc/src)
message (STATUS "Using lfalloc=${USE_LFALLOC}: ${LFALLOC_INCLUDE_DIR}")
endif ()

View File

@ -36,6 +36,8 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY)
set (ENABLE_DATA_SQLITE 0 CACHE BOOL "")
set (ENABLE_DATA_MYSQL 0 CACHE BOOL "")
set (ENABLE_DATA_POSTGRESQL 0 CACHE BOOL "")
set (ENABLE_ENCODINGS 0 CACHE BOOL "")
# new after 2.0.0:
set (POCO_ENABLE_ZIP 0 CACHE BOOL "")
set (POCO_ENABLE_PAGECOMPILER 0 CACHE BOOL "")

View File

@ -284,6 +284,7 @@ endif ()
if (USE_INTERNAL_BROTLI_LIBRARY)
add_subdirectory(brotli-cmake)
target_compile_definitions(brotli PRIVATE BROTLI_BUILD_PORTABLE=1)
endif ()
if (USE_INTERNAL_PROTOBUF_LIBRARY)

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit 05dab0efee80be405aad5f74721b692b6889b75e
Subproject commit 05b0f9064cca4bd55548dedb0a32ed9461146c1e

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,23 @@
#pragma once
#include <string.h>
#include <stdlib.h>
#include "util/system/compiler.h"
namespace NMalloc {
volatile inline bool IsAllocatorCorrupted = false;
static inline void AbortFromCorruptedAllocator() {
IsAllocatorCorrupted = true;
abort();
}
struct TAllocHeader {
void* Block;
size_t AllocSize;
void Y_FORCE_INLINE Encode(void* block, size_t size, size_t signature) {
Block = block;
AllocSize = size | signature;
}
};
}

View File

@ -0,0 +1,33 @@
Style guide for the util folder is a stricter version of general style guide (mostly in terms of ambiguity resolution).
* all {} must be in K&R style
* &, * tied closer to a type, not to variable
* always use `using` not `typedef`
* even a single line block must be in braces {}:
```
if (A) {
B();
}
```
* _ at the end of private data member of a class - `First_`, `Second_`
* every .h file must be accompanied with corresponding .cpp to avoid a leakage and check that it is self contained
* prohibited to use `printf`-like functions
Things declared in the general style guide, which sometimes are missed:
* `template <`, not `template<`
* `noexcept`, not `throw ()` nor `throw()`, not required for destructors
* indents inside `namespace` same as inside `class`
Requirements for a new code (and for corrections in an old code which involves change of behaviour) in util:
* presence of UNIT-tests
* presence of comments in Doxygen style
* accessors without Get prefix (`Length()`, but not `GetLength()`)
This guide is not a mandatory as there is the general style guide.
Nevertheless if it is not followed, then a next `ya style .` run in the util folder will undeservedly update authors of some lines of code.
Thus before a commit it is recommended to run `ya style .` in the util folder.

View File

@ -0,0 +1,51 @@
#pragma once
#include "defaults.h"
using TAtomicBase = intptr_t;
using TAtomic = volatile TAtomicBase;
#if defined(__GNUC__)
#include "atomic_gcc.h"
#elif defined(_MSC_VER)
#include "atomic_win.h"
#else
#error unsupported platform
#endif
#if !defined(ATOMIC_COMPILER_BARRIER)
#define ATOMIC_COMPILER_BARRIER()
#endif
static inline TAtomicBase AtomicSub(TAtomic& a, TAtomicBase v) {
return AtomicAdd(a, -v);
}
static inline TAtomicBase AtomicGetAndSub(TAtomic& a, TAtomicBase v) {
return AtomicGetAndAdd(a, -v);
}
#if defined(USE_GENERIC_SETGET)
static inline TAtomicBase AtomicGet(const TAtomic& a) {
return a;
}
static inline void AtomicSet(TAtomic& a, TAtomicBase v) {
a = v;
}
#endif
static inline bool AtomicTryLock(TAtomic* a) {
return AtomicCas(a, 1, 0);
}
static inline bool AtomicTryAndTryLock(TAtomic* a) {
return (AtomicGet(*a) == 0) && AtomicTryLock(a);
}
static inline void AtomicUnlock(TAtomic* a) {
ATOMIC_COMPILER_BARRIER();
AtomicSet(*a, 0);
}
#include "atomic_ops.h"

View File

@ -0,0 +1,90 @@
#pragma once
#define ATOMIC_COMPILER_BARRIER() __asm__ __volatile__("" \
: \
: \
: "memory")
static inline TAtomicBase AtomicGet(const TAtomic& a) {
TAtomicBase tmp;
#if defined(_arm64_)
__asm__ __volatile__(
"ldar %x[value], %[ptr] \n\t"
: [value] "=r"(tmp)
: [ptr] "Q"(a)
: "memory");
#else
__atomic_load(&a, &tmp, __ATOMIC_ACQUIRE);
#endif
return tmp;
}
static inline void AtomicSet(TAtomic& a, TAtomicBase v) {
#if defined(_arm64_)
__asm__ __volatile__(
"stlr %x[value], %[ptr] \n\t"
: [ptr] "=Q"(a)
: [value] "r"(v)
: "memory");
#else
__atomic_store(&a, &v, __ATOMIC_RELEASE);
#endif
}
static inline intptr_t AtomicIncrement(TAtomic& p) {
return __atomic_add_fetch(&p, 1, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicGetAndIncrement(TAtomic& p) {
return __atomic_fetch_add(&p, 1, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicDecrement(TAtomic& p) {
return __atomic_sub_fetch(&p, 1, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicGetAndDecrement(TAtomic& p) {
return __atomic_fetch_sub(&p, 1, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicAdd(TAtomic& p, intptr_t v) {
return __atomic_add_fetch(&p, v, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicGetAndAdd(TAtomic& p, intptr_t v) {
return __atomic_fetch_add(&p, v, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicSwap(TAtomic* p, intptr_t v) {
(void)p; // disable strange 'parameter set but not used' warning on gcc
intptr_t ret;
__atomic_exchange(p, &v, &ret, __ATOMIC_SEQ_CST);
return ret;
}
static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
(void)a; // disable strange 'parameter set but not used' warning on gcc
return __atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
(void)a; // disable strange 'parameter set but not used' warning on gcc
__atomic_compare_exchange(a, &compare, &exchange, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
return compare;
}
static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) {
return __atomic_or_fetch(&a, b, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) {
return __atomic_xor_fetch(&a, b, __ATOMIC_SEQ_CST);
}
static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) {
return __atomic_and_fetch(&a, b, __ATOMIC_SEQ_CST);
}
static inline void AtomicBarrier() {
__sync_synchronize();
}

View File

@ -0,0 +1,189 @@
#pragma once
#include <type_traits>
template <typename T>
inline TAtomic* AsAtomicPtr(T volatile* target) {
return reinterpret_cast<TAtomic*>(target);
}
template <typename T>
inline const TAtomic* AsAtomicPtr(T const volatile* target) {
return reinterpret_cast<const TAtomic*>(target);
}
// integral types
template <typename T>
struct TAtomicTraits {
enum {
Castable = std::is_integral<T>::value && sizeof(T) == sizeof(TAtomicBase) && !std::is_const<T>::value,
};
};
template <typename T, typename TT>
using TEnableIfCastable = std::enable_if_t<TAtomicTraits<T>::Castable, TT>;
template <typename T>
inline TEnableIfCastable<T, T> AtomicGet(T const volatile& target) {
return static_cast<T>(AtomicGet(*AsAtomicPtr(&target)));
}
template <typename T>
inline TEnableIfCastable<T, void> AtomicSet(T volatile& target, TAtomicBase value) {
AtomicSet(*AsAtomicPtr(&target), value);
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicIncrement(T volatile& target) {
return static_cast<T>(AtomicIncrement(*AsAtomicPtr(&target)));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicGetAndIncrement(T volatile& target) {
return static_cast<T>(AtomicGetAndIncrement(*AsAtomicPtr(&target)));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicDecrement(T volatile& target) {
return static_cast<T>(AtomicDecrement(*AsAtomicPtr(&target)));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicGetAndDecrement(T volatile& target) {
return static_cast<T>(AtomicGetAndDecrement(*AsAtomicPtr(&target)));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicAdd(T volatile& target, TAtomicBase value) {
return static_cast<T>(AtomicAdd(*AsAtomicPtr(&target), value));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicGetAndAdd(T volatile& target, TAtomicBase value) {
return static_cast<T>(AtomicGetAndAdd(*AsAtomicPtr(&target), value));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicSub(T volatile& target, TAtomicBase value) {
return static_cast<T>(AtomicSub(*AsAtomicPtr(&target), value));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicGetAndSub(T volatile& target, TAtomicBase value) {
return static_cast<T>(AtomicGetAndSub(*AsAtomicPtr(&target), value));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicSwap(T volatile* target, TAtomicBase exchange) {
return static_cast<T>(AtomicSwap(AsAtomicPtr(target), exchange));
}
template <typename T>
inline TEnableIfCastable<T, bool> AtomicCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) {
return AtomicCas(AsAtomicPtr(target), exchange, compare);
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicGetAndCas(T volatile* target, TAtomicBase exchange, TAtomicBase compare) {
return static_cast<T>(AtomicGetAndCas(AsAtomicPtr(target), exchange, compare));
}
template <typename T>
inline TEnableIfCastable<T, bool> AtomicTryLock(T volatile* target) {
return AtomicTryLock(AsAtomicPtr(target));
}
template <typename T>
inline TEnableIfCastable<T, bool> AtomicTryAndTryLock(T volatile* target) {
return AtomicTryAndTryLock(AsAtomicPtr(target));
}
template <typename T>
inline TEnableIfCastable<T, void> AtomicUnlock(T volatile* target) {
AtomicUnlock(AsAtomicPtr(target));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicOr(T volatile& target, TAtomicBase value) {
return static_cast<T>(AtomicOr(*AsAtomicPtr(&target), value));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicAnd(T volatile& target, TAtomicBase value) {
return static_cast<T>(AtomicAnd(*AsAtomicPtr(&target), value));
}
template <typename T>
inline TEnableIfCastable<T, T> AtomicXor(T volatile& target, TAtomicBase value) {
return static_cast<T>(AtomicXor(*AsAtomicPtr(&target), value));
}
// pointer types
template <typename T>
inline T* AtomicGet(T* const volatile& target) {
return reinterpret_cast<T*>(AtomicGet(*AsAtomicPtr(&target)));
}
template <typename T>
inline void AtomicSet(T* volatile& target, T* value) {
AtomicSet(*AsAtomicPtr(&target), reinterpret_cast<TAtomicBase>(value));
}
using TNullPtr = decltype(nullptr);
template <typename T>
inline void AtomicSet(T* volatile& target, TNullPtr) {
AtomicSet(*AsAtomicPtr(&target), 0);
}
template <typename T>
inline T* AtomicSwap(T* volatile* target, T* exchange) {
return reinterpret_cast<T*>(AtomicSwap(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange)));
}
template <typename T>
inline T* AtomicSwap(T* volatile* target, TNullPtr) {
return reinterpret_cast<T*>(AtomicSwap(AsAtomicPtr(target), 0));
}
template <typename T>
inline bool AtomicCas(T* volatile* target, T* exchange, T* compare) {
return AtomicCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), reinterpret_cast<TAtomicBase>(compare));
}
template <typename T>
inline T* AtomicGetAndCas(T* volatile* target, T* exchange, T* compare) {
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), reinterpret_cast<TAtomicBase>(compare)));
}
template <typename T>
inline bool AtomicCas(T* volatile* target, T* exchange, TNullPtr) {
return AtomicCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), 0);
}
template <typename T>
inline T* AtomicGetAndCas(T* volatile* target, T* exchange, TNullPtr) {
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), reinterpret_cast<TAtomicBase>(exchange), 0));
}
template <typename T>
inline bool AtomicCas(T* volatile* target, TNullPtr, T* compare) {
return AtomicCas(AsAtomicPtr(target), 0, reinterpret_cast<TAtomicBase>(compare));
}
template <typename T>
inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, T* compare) {
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), 0, reinterpret_cast<TAtomicBase>(compare)));
}
template <typename T>
inline bool AtomicCas(T* volatile* target, TNullPtr, TNullPtr) {
return AtomicCas(AsAtomicPtr(target), 0, 0);
}
template <typename T>
inline T* AtomicGetAndCas(T* volatile* target, TNullPtr, TNullPtr) {
return reinterpret_cast<T*>(AtomicGetAndCas(AsAtomicPtr(target), 0, 0));
}

View File

@ -0,0 +1,114 @@
#pragma once
#include <intrin.h>
#define USE_GENERIC_SETGET
#if defined(_i386_)
#pragma intrinsic(_InterlockedIncrement)
#pragma intrinsic(_InterlockedDecrement)
#pragma intrinsic(_InterlockedExchangeAdd)
#pragma intrinsic(_InterlockedExchange)
#pragma intrinsic(_InterlockedCompareExchange)
static inline intptr_t AtomicIncrement(TAtomic& a) {
return _InterlockedIncrement((volatile long*)&a);
}
static inline intptr_t AtomicGetAndIncrement(TAtomic& a) {
return _InterlockedIncrement((volatile long*)&a) - 1;
}
static inline intptr_t AtomicDecrement(TAtomic& a) {
return _InterlockedDecrement((volatile long*)&a);
}
static inline intptr_t AtomicGetAndDecrement(TAtomic& a) {
return _InterlockedDecrement((volatile long*)&a) + 1;
}
static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) {
return _InterlockedExchangeAdd((volatile long*)&a, b) + b;
}
static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) {
return _InterlockedExchangeAdd((volatile long*)&a, b);
}
static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) {
return _InterlockedExchange((volatile long*)a, b);
}
static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
return _InterlockedCompareExchange((volatile long*)a, exchange, compare) == compare;
}
static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
return _InterlockedCompareExchange((volatile long*)a, exchange, compare);
}
#else // _x86_64_
#pragma intrinsic(_InterlockedIncrement64)
#pragma intrinsic(_InterlockedDecrement64)
#pragma intrinsic(_InterlockedExchangeAdd64)
#pragma intrinsic(_InterlockedExchange64)
#pragma intrinsic(_InterlockedCompareExchange64)
static inline intptr_t AtomicIncrement(TAtomic& a) {
return _InterlockedIncrement64((volatile __int64*)&a);
}
static inline intptr_t AtomicGetAndIncrement(TAtomic& a) {
return _InterlockedIncrement64((volatile __int64*)&a) - 1;
}
static inline intptr_t AtomicDecrement(TAtomic& a) {
return _InterlockedDecrement64((volatile __int64*)&a);
}
static inline intptr_t AtomicGetAndDecrement(TAtomic& a) {
return _InterlockedDecrement64((volatile __int64*)&a) + 1;
}
static inline intptr_t AtomicAdd(TAtomic& a, intptr_t b) {
return _InterlockedExchangeAdd64((volatile __int64*)&a, b) + b;
}
static inline intptr_t AtomicGetAndAdd(TAtomic& a, intptr_t b) {
return _InterlockedExchangeAdd64((volatile __int64*)&a, b);
}
static inline intptr_t AtomicSwap(TAtomic* a, intptr_t b) {
return _InterlockedExchange64((volatile __int64*)a, b);
}
static inline bool AtomicCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare) == compare;
}
static inline intptr_t AtomicGetAndCas(TAtomic* a, intptr_t exchange, intptr_t compare) {
return _InterlockedCompareExchange64((volatile __int64*)a, exchange, compare);
}
static inline intptr_t AtomicOr(TAtomic& a, intptr_t b) {
return _InterlockedOr64(&a, b) | b;
}
static inline intptr_t AtomicAnd(TAtomic& a, intptr_t b) {
return _InterlockedAnd64(&a, b) & b;
}
static inline intptr_t AtomicXor(TAtomic& a, intptr_t b) {
return _InterlockedXor64(&a, b) ^ b;
}
#endif // _x86_
//TODO
static inline void AtomicBarrier() {
TAtomic val = 0;
AtomicSwap(&val, 0);
}

View File

@ -0,0 +1,617 @@
#pragma once
// useful cross-platfrom definitions for compilers
/**
* @def Y_FUNC_SIGNATURE
*
* Use this macro to get pretty function name (see example).
*
* @code
* void Hi() {
* Cout << Y_FUNC_SIGNATURE << Endl;
* }
* template <typename T>
* void Do() {
* Cout << Y_FUNC_SIGNATURE << Endl;
* }
* int main() {
* Hi(); // void Hi()
* Do<int>(); // void Do() [T = int]
* Do<TString>(); // void Do() [T = TString]
* }
* @endcode
*/
#if defined(__GNUC__)
#define Y_FUNC_SIGNATURE __PRETTY_FUNCTION__
#elif defined(_MSC_VER)
#define Y_FUNC_SIGNATURE __FUNCSIG__
#else
#define Y_FUNC_SIGNATURE ""
#endif
#ifdef __GNUC__
#define Y_PRINTF_FORMAT(n, m) __attribute__((__format__(__printf__, n, m)))
#endif
#ifndef Y_PRINTF_FORMAT
#define Y_PRINTF_FORMAT(n, m)
#endif
#if defined(__clang__)
#define Y_NO_SANITIZE(...) __attribute__((no_sanitize(__VA_ARGS__)))
#endif
#if !defined(Y_NO_SANITIZE)
#define Y_NO_SANITIZE(...)
#endif
/**
* @def Y_DECLARE_UNUSED
*
* Macro is needed to silence compiler warning about unused entities (e.g. function or argument).
*
* @code
* Y_DECLARE_UNUSED int FunctionUsedSolelyForDebugPurposes();
* assert(FunctionUsedSolelyForDebugPurposes() == 42);
*
* void Foo(const int argumentUsedOnlyForDebugPurposes Y_DECLARE_UNUSED) {
* assert(argumentUsedOnlyForDebugPurposes == 42);
* // however you may as well omit `Y_DECLARE_UNUSED` and use `UNUSED` macro instead
* Y_UNUSED(argumentUsedOnlyForDebugPurposes);
* }
* @endcode
*/
#ifdef __GNUC__
#define Y_DECLARE_UNUSED __attribute__((unused))
#endif
#ifndef Y_DECLARE_UNUSED
#define Y_DECLARE_UNUSED
#endif
#if defined(__GNUC__)
#define Y_LIKELY(Cond) __builtin_expect(!!(Cond), 1)
#define Y_UNLIKELY(Cond) __builtin_expect(!!(Cond), 0)
#define Y_PREFETCH_READ(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 0, Priority)
#define Y_PREFETCH_WRITE(Pointer, Priority) __builtin_prefetch((const void*)(Pointer), 1, Priority)
#endif
/**
* @def Y_FORCE_INLINE
*
* Macro to use in place of 'inline' in function declaration/definition to force
* it to be inlined.
*/
#if !defined(Y_FORCE_INLINE)
#if defined(CLANG_COVERAGE)
#/* excessive __always_inline__ might significantly slow down compilation of an instrumented unit */
#define Y_FORCE_INLINE inline
#elif defined(_MSC_VER)
#define Y_FORCE_INLINE __forceinline
#elif defined(__GNUC__)
#/* Clang also defines __GNUC__ (as 4) */
#define Y_FORCE_INLINE inline __attribute__((__always_inline__))
#else
#define Y_FORCE_INLINE inline
#endif
#endif
/**
* @def Y_NO_INLINE
*
* Macro to use in place of 'inline' in function declaration/definition to
* prevent it from being inlined.
*/
#if !defined(Y_NO_INLINE)
#if defined(_MSC_VER)
#define Y_NO_INLINE __declspec(noinline)
#elif defined(__GNUC__) || defined(__INTEL_COMPILER)
#/* Clang also defines __GNUC__ (as 4) */
#define Y_NO_INLINE __attribute__((__noinline__))
#else
#define Y_NO_INLINE
#endif
#endif
//to cheat compiler about strict aliasing or similar problems
#if defined(__GNUC__)
#define Y_FAKE_READ(X) \
do { \
__asm__ __volatile__("" \
: \
: "m"(X)); \
} while (0)
#define Y_FAKE_WRITE(X) \
do { \
__asm__ __volatile__("" \
: "=m"(X)); \
} while (0)
#endif
#if !defined(Y_FAKE_READ)
#define Y_FAKE_READ(X)
#endif
#if !defined(Y_FAKE_WRITE)
#define Y_FAKE_WRITE(X)
#endif
#ifndef Y_PREFETCH_READ
#define Y_PREFETCH_READ(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority
#endif
#ifndef Y_PREFETCH_WRITE
#define Y_PREFETCH_WRITE(Pointer, Priority) (void)(const void*)(Pointer), (void)Priority
#endif
#ifndef Y_LIKELY
#define Y_LIKELY(Cond) (Cond)
#define Y_UNLIKELY(Cond) (Cond)
#endif
#ifdef __GNUC__
#define _packed __attribute__((packed))
#else
#define _packed
#endif
#if defined(__GNUC__)
#define Y_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
#endif
#ifndef Y_WARN_UNUSED_RESULT
#define Y_WARN_UNUSED_RESULT
#endif
#if defined(__GNUC__)
#define Y_HIDDEN __attribute__((visibility("hidden")))
#endif
#if !defined(Y_HIDDEN)
#define Y_HIDDEN
#endif
#if defined(__GNUC__)
#define Y_PUBLIC __attribute__((visibility("default")))
#endif
#if !defined(Y_PUBLIC)
#define Y_PUBLIC
#endif
#if !defined(Y_UNUSED) && !defined(__cplusplus)
#define Y_UNUSED(var) (void)(var)
#endif
#if !defined(Y_UNUSED) && defined(__cplusplus)
template <class... Types>
constexpr Y_FORCE_INLINE int Y_UNUSED(Types&&...) {
return 0;
};
#endif
/**
* @def Y_ASSUME
*
* Macro that tells the compiler that it can generate optimized code
* as if the given expression will always evaluate true.
* The behavior is undefined if it ever evaluates false.
*
* @code
* // factored into a function so that it's testable
* inline int Avg(int x, int y) {
* if (x >= 0 && y >= 0) {
* return (static_cast<unsigned>(x) + static_cast<unsigned>(y)) >> 1;
* } else {
* // a slower implementation
* }
* }
*
* // we know that xs and ys are non-negative from domain knowledge,
* // but we can't change the types of xs and ys because of API constrains
* int Foo(const TVector<int>& xs, const TVector<int>& ys) {
* TVector<int> avgs;
* avgs.resize(xs.size());
* for (size_t i = 0; i < xs.size(); ++i) {
* auto x = xs[i];
* auto y = ys[i];
* Y_ASSUME(x >= 0);
* Y_ASSUME(y >= 0);
* xs[i] = Avg(x, y);
* }
* }
* @endcode
*/
#if defined(__GNUC__)
#define Y_ASSUME(condition) ((condition) ? (void)0 : __builtin_unreachable())
#elif defined(_MSC_VER)
#define Y_ASSUME(condition) __assume(condition)
#else
#define Y_ASSUME(condition) Y_UNUSED(condition)
#endif
#ifdef __cplusplus
[[noreturn]]
#endif
Y_HIDDEN void _YandexAbort();
/**
* @def Y_UNREACHABLE
*
* Macro that marks the rest of the code branch unreachable.
* The behavior is undefined if it's ever reached.
*
* @code
* switch (i % 3) {
* case 0:
* return foo;
* case 1:
* return bar;
* case 2:
* return baz;
* default:
* Y_UNREACHABLE();
* }
* @endcode
*/
#if defined(__GNUC__) || defined(_MSC_VER)
#define Y_UNREACHABLE() Y_ASSUME(0)
#else
#define Y_UNREACHABLE() _YandexAbort()
#endif
#if defined(undefined_sanitizer_enabled)
#define _ubsan_enabled_
#endif
#ifdef __clang__
#if __has_feature(thread_sanitizer)
#define _tsan_enabled_
#endif
#if __has_feature(memory_sanitizer)
#define _msan_enabled_
#endif
#if __has_feature(address_sanitizer)
#define _asan_enabled_
#endif
#else
#if defined(thread_sanitizer_enabled) || defined(__SANITIZE_THREAD__)
#define _tsan_enabled_
#endif
#if defined(memory_sanitizer_enabled)
#define _msan_enabled_
#endif
#if defined(address_sanitizer_enabled) || defined(__SANITIZE_ADDRESS__)
#define _asan_enabled_
#endif
#endif
#if defined(_asan_enabled_) || defined(_msan_enabled_) || defined(_tsan_enabled_) || defined(_ubsan_enabled_)
#define _san_enabled_
#endif
#if defined(_MSC_VER)
#define __PRETTY_FUNCTION__ __FUNCSIG__
#endif
#if defined(__GNUC__)
#define Y_WEAK __attribute__((weak))
#else
#define Y_WEAK
#endif
#if defined(__CUDACC_VER_MAJOR__)
#define Y_CUDA_AT_LEAST(x, y) (__CUDACC_VER_MAJOR__ > x || (__CUDACC_VER_MAJOR__ == x && __CUDACC_VER_MINOR__ >= y))
#else
#define Y_CUDA_AT_LEAST(x, y) 0
#endif
// NVidia CUDA C++ Compiler did not know about noexcept keyword until version 9.0
#if !Y_CUDA_AT_LEAST(9, 0)
#if defined(__CUDACC__) && !defined(noexcept)
#define noexcept throw ()
#endif
#endif
#if defined(__GNUC__)
#define Y_COLD __attribute__((cold))
#define Y_LEAF __attribute__((leaf))
#define Y_WRAPPER __attribute__((artificial))
#else
#define Y_COLD
#define Y_LEAF
#define Y_WRAPPER
#endif
/**
* @def Y_PRAGMA
*
* Macro for use in other macros to define compiler pragma
* See below for other usage examples
*
* @code
* #if defined(__clang__) || defined(__GNUC__)
* #define Y_PRAGMA_NO_WSHADOW \
* Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"")
* #elif defined(_MSC_VER)
* #define Y_PRAGMA_NO_WSHADOW \
* Y_PRAGMA("warning(disable:4456 4457")
* #else
* #define Y_PRAGMA_NO_WSHADOW
* #endif
* @endcode
*/
#if defined(__clang__) || defined(__GNUC__)
#define Y_PRAGMA(x) _Pragma(x)
#elif defined(_MSC_VER)
#define Y_PRAGMA(x) __pragma(x)
#else
#define Y_PRAGMA(x)
#endif
/**
* @def Y_PRAGMA_DIAGNOSTIC_PUSH
*
* Cross-compiler pragma to save diagnostic settings
*
* @see
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html
* MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx
* Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas
*
* @code
* Y_PRAGMA_DIAGNOSTIC_PUSH
* @endcode
*/
#if defined(__clang__) || defined(__GNUC__)
#define Y_PRAGMA_DIAGNOSTIC_PUSH \
Y_PRAGMA("GCC diagnostic push")
#elif defined(_MSC_VER)
#define Y_PRAGMA_DIAGNOSTIC_PUSH \
Y_PRAGMA(warning(push))
#else
#define Y_PRAGMA_DIAGNOSTIC_PUSH
#endif
/**
* @def Y_PRAGMA_DIAGNOSTIC_POP
*
* Cross-compiler pragma to restore diagnostic settings
*
* @see
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Diagnostic-Pragmas.html
* MSVC: https://msdn.microsoft.com/en-us/library/2c8f766e.aspx
* Clang: https://clang.llvm.org/docs/UsersManual.html#controlling-diagnostics-via-pragmas
*
* @code
* Y_PRAGMA_DIAGNOSTIC_POP
* @endcode
*/
#if defined(__clang__) || defined(__GNUC__)
#define Y_PRAGMA_DIAGNOSTIC_POP \
Y_PRAGMA("GCC diagnostic pop")
#elif defined(_MSC_VER)
#define Y_PRAGMA_DIAGNOSTIC_POP \
Y_PRAGMA(warning(pop))
#else
#define Y_PRAGMA_DIAGNOSTIC_POP
#endif
/**
* @def Y_PRAGMA_NO_WSHADOW
*
* Cross-compiler pragma to disable warnings about shadowing variables
*
* @code
* Y_PRAGMA_DIAGNOSTIC_PUSH
* Y_PRAGMA_NO_WSHADOW
*
* // some code which use variable shadowing, e.g.:
*
* for (int i = 0; i < 100; ++i) {
* Use(i);
*
* for (int i = 42; i < 100500; ++i) { // this i is shadowing previous i
* AnotherUse(i);
* }
* }
*
* Y_PRAGMA_DIAGNOSTIC_POP
* @endcode
*/
#if defined(__clang__) || defined(__GNUC__)
#define Y_PRAGMA_NO_WSHADOW \
Y_PRAGMA("GCC diagnostic ignored \"-Wshadow\"")
#elif defined(_MSC_VER)
#define Y_PRAGMA_NO_WSHADOW \
Y_PRAGMA(warning(disable : 4456 4457))
#else
#define Y_PRAGMA_NO_WSHADOW
#endif
/**
* @ def Y_PRAGMA_NO_UNUSED_FUNCTION
*
* Cross-compiler pragma to disable warnings about unused functions
*
* @see
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
* Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-function
* MSVC: there is no such warning
*
* @code
* Y_PRAGMA_DIAGNOSTIC_PUSH
* Y_PRAGMA_NO_UNUSED_FUNCTION
*
* // some code which introduces a function which later will not be used, e.g.:
*
* void Foo() {
* }
*
* int main() {
* return 0; // Foo() never called
* }
*
* Y_PRAGMA_DIAGNOSTIC_POP
* @endcode
*/
#if defined(__clang__) || defined(__GNUC__)
#define Y_PRAGMA_NO_UNUSED_FUNCTION \
Y_PRAGMA("GCC diagnostic ignored \"-Wunused-function\"")
#else
#define Y_PRAGMA_NO_UNUSED_FUNCTION
#endif
/**
* @ def Y_PRAGMA_NO_UNUSED_PARAMETER
*
* Cross-compiler pragma to disable warnings about unused function parameters
*
* @see
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
* Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wunused-parameter
* MSVC: https://msdn.microsoft.com/en-us/library/26kb9fy0.aspx
*
* @code
* Y_PRAGMA_DIAGNOSTIC_PUSH
* Y_PRAGMA_NO_UNUSED_PARAMETER
*
* // some code which introduces a function with unused parameter, e.g.:
*
* void foo(int a) {
* // a is not referenced
* }
*
* int main() {
* foo(1);
* return 0;
* }
*
* Y_PRAGMA_DIAGNOSTIC_POP
* @endcode
*/
#if defined(__clang__) || defined(__GNUC__)
#define Y_PRAGMA_NO_UNUSED_PARAMETER \
Y_PRAGMA("GCC diagnostic ignored \"-Wunused-parameter\"")
#elif defined(_MSC_VER)
#define Y_PRAGMA_NO_UNUSED_PARAMETER \
Y_PRAGMA(warning(disable : 4100))
#else
#define Y_PRAGMA_NO_UNUSED_PARAMETER
#endif
/**
* @def Y_PRAGMA_NO_DEPRECATED
*
* Cross compiler pragma to disable warnings and errors about deprecated
*
* @see
* GCC: https://gcc.gnu.org/onlinedocs/gcc/Warning-Options.html
* Clang: https://clang.llvm.org/docs/DiagnosticsReference.html#wdeprecated
* MSVC: https://docs.microsoft.com/en-us/cpp/error-messages/compiler-warnings/compiler-warning-level-3-c4996?view=vs-2017
*
* @code
* Y_PRAGMA_DIAGNOSTIC_PUSH
* Y_PRAGMA_NO_DEPRECATED
*
* [deprecated] void foo() {
* // ...
* }
*
* int main() {
* foo();
* return 0;
* }
*
* Y_PRAGMA_DIAGNOSTIC_POP
* @endcode
*/
#if defined(__clang__) || defined(__GNUC__)
#define Y_PRAGMA_NO_DEPRECATED \
Y_PRAGMA("GCC diagnostic ignored \"-Wdeprecated\"")
#elif defined(_MSC_VER)
#define Y_PRAGMA_NO_DEPRECATED \
Y_PRAGMA(warning(disable : 4996))
#else
#define Y_PRAGMA_NO_DEPRECATED
#endif
#if defined(__clang__) || defined(__GNUC__)
/**
* @def Y_CONST_FUNCTION
methods and functions, marked with this method are promised to:
1. do not have side effects
2. this method do not read global memory
NOTE: this attribute can't be set for methods that depend on data, pointed by this
this allow compilers to do hard optimization of that functions
NOTE: in common case this attribute can't be set if method have pointer-arguments
NOTE: as result there no any reason to discard result of such method
*/
#define Y_CONST_FUNCTION [[gnu::const]]
#endif
#if !defined(Y_CONST_FUNCTION)
#define Y_CONST_FUNCTION
#endif
#if defined(__clang__) || defined(__GNUC__)
/**
* @def Y_PURE_FUNCTION
methods and functions, marked with this method are promised to:
1. do not have side effects
2. result will be the same if no global memory changed
this allow compilers to do hard optimization of that functions
NOTE: as result there no any reason to discard result of such method
*/
#define Y_PURE_FUNCTION [[gnu::pure]]
#endif
#if !defined(Y_PURE_FUNCTION)
#define Y_PURE_FUNCTION
#endif
/**
* @ def Y_HAVE_INT128
*
* Defined when the compiler supports __int128 extension
*
* @code
*
* #if defined(Y_HAVE_INT128)
* __int128 myVeryBigInt = 12345678901234567890;
* #endif
*
* @endcode
*/
#if defined(__SIZEOF_INT128__)
#define Y_HAVE_INT128 1
#endif
/**
* XRAY macro must be passed to compiler if XRay is enabled.
*
* Define everything XRay-specific as a macro so that it doesn't cause errors
* for compilers that doesn't support XRay.
*/
#if defined(XRAY) && defined(__cplusplus)
#include <xray/xray_interface.h>
#define Y_XRAY_ALWAYS_INSTRUMENT [[clang::xray_always_instrument]]
#define Y_XRAY_NEVER_INSTRUMENT [[clang::xray_never_instrument]]
#define Y_XRAY_CUSTOM_EVENT(__string, __length) \
do { \
__xray_customevent(__string, __length); \
} while (0)
#else
#define Y_XRAY_ALWAYS_INSTRUMENT
#define Y_XRAY_NEVER_INSTRUMENT
#define Y_XRAY_CUSTOM_EVENT(__string, __length) \
do { \
} while (0)
#endif

View File

@ -0,0 +1,168 @@
#pragma once
#include "platform.h"
#if defined _unix_
#define LOCSLASH_C '/'
#define LOCSLASH_S "/"
#else
#define LOCSLASH_C '\\'
#define LOCSLASH_S "\\"
#endif // _unix_
#if defined(__INTEL_COMPILER) && defined(__cplusplus)
#include <new>
#endif
// low and high parts of integers
#if !defined(_win_)
#include <sys/param.h>
#endif
#if defined(BSD) || defined(_android_)
#if defined(BSD)
#include <machine/endian.h>
#endif
#if defined(_android_)
#include <endian.h>
#endif
#if (BYTE_ORDER == LITTLE_ENDIAN)
#define _little_endian_
#elif (BYTE_ORDER == BIG_ENDIAN)
#define _big_endian_
#else
#error unknown endian not supported
#endif
#elif (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(WHATEVER_THAT_HAS_BIG_ENDIAN)
#define _big_endian_
#else
#define _little_endian_
#endif
// alignment
#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_QUADS)
#define _must_align8_
#endif
#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_LONGS)
#define _must_align4_
#endif
#if (defined(_sun_) && !defined(__i386__)) || defined(_hpux_) || defined(__alpha__) || defined(__ia64__) || defined(WHATEVER_THAT_NEEDS_ALIGNING_SHORTS)
#define _must_align2_
#endif
#if defined(__GNUC__)
#define alias_hack __attribute__((__may_alias__))
#endif
#ifndef alias_hack
#define alias_hack
#endif
#include "types.h"
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
#define PRAGMA(x) _Pragma(#x)
#define RCSID(idstr) PRAGMA(comment(exestr, idstr))
#else
#define RCSID(idstr) static const char rcsid[] = idstr
#endif
#include "compiler.h"
#ifdef _win_
#include <malloc.h>
#elif defined(_sun_)
#include <alloca.h>
#endif
#ifdef NDEBUG
#define Y_IF_DEBUG(X)
#else
#define Y_IF_DEBUG(X) X
#endif
/**
* @def Y_ARRAY_SIZE
*
* This macro is needed to get number of elements in a statically allocated fixed size array. The
* expression is a compile-time constant and therefore can be used in compile time computations.
*
* @code
* enum ENumbers {
* EN_ONE,
* EN_TWO,
* EN_SIZE
* }
*
* const char* NAMES[] = {
* "one",
* "two"
* }
*
* static_assert(Y_ARRAY_SIZE(NAMES) == EN_SIZE, "you should define `NAME` for each enumeration");
* @endcode
*
* This macro also catches type errors. If you see a compiler error like "warning: division by zero
* is undefined" when using `Y_ARRAY_SIZE` then you are probably giving it a pointer.
*
* Since all of our code is expected to work on a 64 bit platform where pointers are 8 bytes we may
* falsefully accept pointers to types of sizes that are divisors of 8 (1, 2, 4 and 8).
*/
#if defined(__cplusplus)
namespace NArraySizePrivate {
template <class T>
struct TArraySize;
template <class T, size_t N>
struct TArraySize<T[N]> {
enum {
Result = N
};
};
template <class T, size_t N>
struct TArraySize<T (&)[N]> {
enum {
Result = N
};
};
}
#define Y_ARRAY_SIZE(arr) ((size_t)::NArraySizePrivate::TArraySize<decltype(arr)>::Result)
#else
#undef Y_ARRAY_SIZE
#define Y_ARRAY_SIZE(arr) \
((sizeof(arr) / sizeof((arr)[0])) / static_cast<size_t>(!(sizeof(arr) % sizeof((arr)[0]))))
#endif
#undef Y_ARRAY_BEGIN
#define Y_ARRAY_BEGIN(arr) (arr)
#undef Y_ARRAY_END
#define Y_ARRAY_END(arr) ((arr) + Y_ARRAY_SIZE(arr))
/**
* Concatenates two symbols, even if one of them is itself a macro.
*/
#define Y_CAT(X, Y) Y_CAT_I(X, Y)
#define Y_CAT_I(X, Y) Y_CAT_II(X, Y)
#define Y_CAT_II(X, Y) X##Y
#define Y_STRINGIZE(X) UTIL_PRIVATE_STRINGIZE_AUX(X)
#define UTIL_PRIVATE_STRINGIZE_AUX(X) #X
#if defined(__COUNTER__)
#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __COUNTER__)
#endif
#if !defined(Y_GENERATE_UNIQUE_ID)
#define Y_GENERATE_UNIQUE_ID(N) Y_CAT(N, __LINE__)
#endif
#define NPOS ((size_t)-1)

View File

@ -0,0 +1,242 @@
#pragma once
// What OS ?
// our definition has the form _{osname}_
#if defined(_WIN64)
#define _win64_
#define _win32_
#elif defined(__WIN32__) || defined(_WIN32) // _WIN32 is also defined by the 64-bit compiler for backward compatibility
#define _win32_
#else
#define _unix_
#if defined(__sun__) || defined(sun) || defined(sparc) || defined(__sparc)
#define _sun_
#endif
#if defined(__hpux__)
#define _hpux_
#endif
#if defined(__linux__)
#define _linux_
#endif
#if defined(__FreeBSD__)
#define _freebsd_
#endif
#if defined(__CYGWIN__)
#define _cygwin_
#endif
#if defined(__APPLE__)
#define _darwin_
#endif
#if defined(__ANDROID__)
#define _android_
#endif
#endif
#if defined(__IOS__)
#define _ios_
#endif
#if defined(_linux_)
#if defined(_musl_)
//nothing to do
#elif defined(_android_)
#define _bionic_
#else
#define _glibc_
#endif
#endif
#if defined(_darwin_)
#define unix
#define __unix__
#endif
#if defined(_win32_) || defined(_win64_)
#define _win_
#endif
#if defined(__arm__) || defined(__ARM__) || defined(__ARM_NEON) || defined(__aarch64__) || defined(_M_ARM)
#if defined(__arm64) || defined(__arm64__) || defined(__aarch64__)
#define _arm64_
#else
#define _arm32_
#endif
#endif
#if defined(_arm64_) || defined(_arm32_)
#define _arm_
#endif
/* __ia64__ and __x86_64__ - defined by GNU C.
* _M_IA64, _M_X64, _M_AMD64 - defined by Visual Studio.
*
* Microsoft can define _M_IX86, _M_AMD64 (before Visual Studio 8)
* or _M_X64 (starting in Visual Studio 8).
*/
#if defined(__x86_64__) || defined(_M_X64) || defined(_M_AMD64)
#define _x86_64_
#endif
#if defined(__i386__) || defined(_M_IX86)
#define _i386_
#endif
#if defined(__ia64__) || defined(_M_IA64)
#define _ia64_
#endif
#if defined(__powerpc__)
#define _ppc_
#endif
#if defined(__powerpc64__)
#define _ppc64_
#endif
#if !defined(sparc) && !defined(__sparc) && !defined(__hpux__) && !defined(__alpha__) && !defined(_ia64_) && !defined(_x86_64_) && !defined(_arm_) && !defined(_i386_) && !defined(_ppc_) && !defined(_ppc64_)
#error "platform not defined, please, define one"
#endif
#if defined(_x86_64_) || defined(_i386_)
#define _x86_
#endif
#if defined(__MIC__)
#define _mic_
#define _k1om_
#endif
// stdio or MessageBox
#if defined(__CONSOLE__) || defined(_CONSOLE)
#define _console_
#endif
#if (defined(_win_) && !defined(_console_))
#define _windows_
#elif !defined(_console_)
#define _console_
#endif
#if defined(__SSE__) || defined(SSE_ENABLED)
#define _sse_
#endif
#if defined(__SSE2__) || defined(SSE2_ENABLED)
#define _sse2_
#endif
#if defined(__SSE3__) || defined(SSE3_ENABLED)
#define _sse3_
#endif
#if defined(__SSSE3__) || defined(SSSE3_ENABLED)
#define _ssse3_
#endif
#if defined(POPCNT_ENABLED)
#define _popcnt_
#endif
#if defined(__DLL__) || defined(_DLL)
#define _dll_
#endif
// 16, 32 or 64
#if defined(__sparc_v9__) || defined(_x86_64_) || defined(_ia64_) || defined(_arm64_) || defined(_ppc64_)
#define _64_
#else
#define _32_
#endif
/* All modern 64-bit Unix systems use scheme LP64 (long, pointers are 64-bit).
* Microsoft uses a different scheme: LLP64 (long long, pointers are 64-bit).
*
* Scheme LP64 LLP64
* char 8 8
* short 16 16
* int 32 32
* long 64 32
* long long 64 64
* pointer 64 64
*/
#if defined(_32_)
#define SIZEOF_PTR 4
#elif defined(_64_)
#define SIZEOF_PTR 8
#endif
#define PLATFORM_DATA_ALIGN SIZEOF_PTR
#if !defined(SIZEOF_PTR)
#error todo
#endif
#define SIZEOF_CHAR 1
#define SIZEOF_UNSIGNED_CHAR 1
#define SIZEOF_SHORT 2
#define SIZEOF_UNSIGNED_SHORT 2
#define SIZEOF_INT 4
#define SIZEOF_UNSIGNED_INT 4
#if defined(_32_)
#define SIZEOF_LONG 4
#define SIZEOF_UNSIGNED_LONG 4
#elif defined(_64_)
#if defined(_win_)
#define SIZEOF_LONG 4
#define SIZEOF_UNSIGNED_LONG 4
#else
#define SIZEOF_LONG 8
#define SIZEOF_UNSIGNED_LONG 8
#endif // _win_
#endif // _32_
#if !defined(SIZEOF_LONG)
#error todo
#endif
#define SIZEOF_LONG_LONG 8
#define SIZEOF_UNSIGNED_LONG_LONG 8
#undef SIZEOF_SIZE_T // in case we include <Python.h> which defines it, too
#define SIZEOF_SIZE_T SIZEOF_PTR
#if defined(__INTEL_COMPILER)
#pragma warning(disable 1292)
#pragma warning(disable 1469)
#pragma warning(disable 193)
#pragma warning(disable 271)
#pragma warning(disable 383)
#pragma warning(disable 424)
#pragma warning(disable 444)
#pragma warning(disable 584)
#pragma warning(disable 593)
#pragma warning(disable 981)
#pragma warning(disable 1418)
#pragma warning(disable 304)
#pragma warning(disable 810)
#pragma warning(disable 1029)
#pragma warning(disable 1419)
#pragma warning(disable 177)
#pragma warning(disable 522)
#pragma warning(disable 858)
#pragma warning(disable 111)
#pragma warning(disable 1599)
#pragma warning(disable 411)
#pragma warning(disable 304)
#pragma warning(disable 858)
#pragma warning(disable 444)
#pragma warning(disable 913)
#pragma warning(disable 310)
#pragma warning(disable 167)
#pragma warning(disable 180)
#pragma warning(disable 1572)
#endif
#if defined(_MSC_VER)
#undef _WINSOCKAPI_
#define _WINSOCKAPI_
#undef NOMINMAX
#define NOMINMAX
#endif

View File

@ -0,0 +1,117 @@
#pragma once
// DO_NOT_STYLE
#include "platform.h"
#include <inttypes.h>
typedef int8_t i8;
typedef int16_t i16;
typedef uint8_t ui8;
typedef uint16_t ui16;
typedef int yssize_t;
#define PRIYSZT "d"
#if defined(_darwin_) && defined(_32_)
typedef unsigned long ui32;
typedef long i32;
#else
typedef uint32_t ui32;
typedef int32_t i32;
#endif
#if defined(_darwin_) && defined(_64_)
typedef unsigned long ui64;
typedef long i64;
#else
typedef uint64_t ui64;
typedef int64_t i64;
#endif
#define LL(number) INT64_C(number)
#define ULL(number) UINT64_C(number)
// Macro for size_t and ptrdiff_t types
#if defined(_32_)
# if defined(_darwin_)
# define PRISZT "lu"
# undef PRIi32
# define PRIi32 "li"
# undef SCNi32
# define SCNi32 "li"
# undef PRId32
# define PRId32 "li"
# undef SCNd32
# define SCNd32 "li"
# undef PRIu32
# define PRIu32 "lu"
# undef SCNu32
# define SCNu32 "lu"
# undef PRIx32
# define PRIx32 "lx"
# undef SCNx32
# define SCNx32 "lx"
# elif !defined(_cygwin_)
# define PRISZT PRIu32
# else
# define PRISZT "u"
# endif
# define SCNSZT SCNu32
# define PRIPDT PRIi32
# define SCNPDT SCNi32
# define PRITMT PRIi32
# define SCNTMT SCNi32
#elif defined(_64_)
# if defined(_darwin_)
# define PRISZT "lu"
# undef PRIu64
# define PRIu64 PRISZT
# undef PRIx64
# define PRIx64 "lx"
# undef PRIX64
# define PRIX64 "lX"
# undef PRId64
# define PRId64 "ld"
# undef PRIi64
# define PRIi64 "li"
# undef SCNi64
# define SCNi64 "li"
# undef SCNu64
# define SCNu64 "lu"
# undef SCNx64
# define SCNx64 "lx"
# else
# define PRISZT PRIu64
# endif
# define SCNSZT SCNu64
# define PRIPDT PRIi64
# define SCNPDT SCNi64
# define PRITMT PRIi64
# define SCNTMT SCNi64
#else
# error "Unsupported platform"
#endif
// SUPERLONG
#if !defined(DONT_USE_SUPERLONG) && !defined(SUPERLONG_MAX)
#define SUPERLONG_MAX ~LL(0)
typedef i64 SUPERLONG;
#endif
// UNICODE
// UCS-2, native byteorder
typedef ui16 wchar16;
// internal symbol type: UTF-16LE
typedef wchar16 TChar;
typedef ui32 wchar32;
#if defined(_MSC_VER)
#include <basetsd.h>
typedef SSIZE_T ssize_t;
#define HAVE_SSIZE_T 1
#include <wchar.h>
#endif
#include <sys/types.h>

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f
Subproject commit 29439cf7fa32c1a2d62d925bb6d6a3f14668a4a2

View File

@ -20,7 +20,7 @@ set (CONFIG_VERSION ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config_version.h)
set (CONFIG_COMMON ${CMAKE_CURRENT_BINARY_DIR}/src/Common/config.h)
include (cmake/version.cmake)
message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION}")
message (STATUS "Will build ${VERSION_FULL} revision ${VERSION_REVISION} ${VERSION_OFFICIAL}")
configure_file (src/Common/config.h.in ${CONFIG_COMMON})
configure_file (src/Common/config_version.h.in ${CONFIG_VERSION})
@ -155,7 +155,6 @@ if (USE_EMBEDDED_COMPILER)
target_include_directories (dbms SYSTEM BEFORE PUBLIC ${LLVM_INCLUDE_DIRS})
endif ()
if (CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE" OR CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "MINSIZEREL")
# Won't generate debug info for files with heavy template instantiation to achieve faster linking and lower size.
set_source_files_properties(
@ -214,6 +213,10 @@ target_link_libraries (clickhouse_common_io
target_include_directories(clickhouse_common_io SYSTEM BEFORE PUBLIC ${RE2_INCLUDE_DIR})
if (USE_LFALLOC)
target_include_directories (clickhouse_common_io SYSTEM BEFORE PUBLIC ${LFALLOC_INCLUDE_DIR})
endif ()
if(CPUID_LIBRARY)
target_link_libraries(clickhouse_common_io PRIVATE ${CPUID_LIBRARY})
endif()

View File

@ -1,11 +1,11 @@
# This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54417)
set(VERSION_REVISION 54418)
set(VERSION_MAJOR 19)
set(VERSION_MINOR 5)
set(VERSION_MINOR 6)
set(VERSION_PATCH 1)
set(VERSION_GITHASH 628ed349c335b79a441a1bd6e4bc791d61dfe62c)
set(VERSION_DESCRIBE v19.5.1.1-testing)
set(VERSION_STRING 19.5.1.1)
set(VERSION_GITHASH 30d3496c36cf3945c9828ac0b7cf7d1774a9f845)
set(VERSION_DESCRIBE v19.6.1.1-testing)
set(VERSION_STRING 19.6.1.1)
# end of autochange
set(VERSION_EXTRA "" CACHE STRING "")
@ -24,3 +24,7 @@ set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}")
set (VERSION_SO "${VERSION_STRING}")
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")
if(YANDEX_OFFICIAL_BUILD)
set(VERSION_OFFICIAL " (official build)")
endif()

View File

@ -1523,7 +1523,7 @@ private:
void showClientVersion()
{
std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << std::endl;
std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
}
public:

View File

@ -1,7 +1,6 @@
#include "ClusterCopier.h"
#include <chrono>
#include <Poco/Util/XMLConfiguration.h>
#include <Poco/Logger.h>
#include <Poco/ConsoleChannel.h>
@ -13,14 +12,11 @@
#include <Poco/FileChannel.h>
#include <Poco/SplitterChannel.h>
#include <Poco/Util/HelpFormatter.h>
#include <boost/algorithm/string.hpp>
#include <pcg_random.hpp>
#include <common/logger_useful.h>
#include <Common/ThreadPool.h>
#include <daemon/OwnPatternFormatter.h>
#include <Common/Exception.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/KeeperException.h>
@ -61,6 +57,7 @@
#include <DataStreams/NullBlockOutputStream.h>
#include <IO/Operators.h>
#include <IO/ReadBufferFromString.h>
#include <IO/ReadBufferFromFile.h>
#include <Functions/registerFunctions.h>
#include <TableFunctions/registerTableFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
@ -500,9 +497,6 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
ErrorCodes::BAD_ARGUMENTS);
}
ASTPtr arguments_ast = engine.arguments->clone();
ASTs & arguments = arguments_ast->children;
if (isExtendedDefinitionStorage(storage_ast))
{
if (storage.partition_by)
@ -516,6 +510,12 @@ static ASTPtr extractPartitionKey(const ASTPtr & storage_ast)
bool is_replicated = startsWith(engine.name, "Replicated");
size_t min_args = is_replicated ? 3 : 1;
if (!engine.arguments)
throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
ASTPtr arguments_ast = engine.arguments->clone();
ASTs & arguments = arguments_ast->children;
if (arguments.size() < min_args)
throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS);
@ -894,6 +894,28 @@ public:
}
}
void uploadTaskDescription(const std::string & task_path, const std::string & task_file, const bool force)
{
auto local_task_description_path = task_path + "/description";
String task_config_str;
{
ReadBufferFromFile in(task_file);
readStringUntilEOF(task_config_str, in);
}
if (task_config_str.empty())
return;
auto zookeeper = context.getZooKeeper();
zookeeper->createAncestors(local_task_description_path);
auto code = zookeeper->tryCreate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
if (code && force)
zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
LOG_DEBUG(log, "Task description " << ((code && !force) ? "not " : "") << "uploaded to " << local_task_description_path << " with result " << code << " ("<< zookeeper->error2string(code) << ")");
}
void reloadTaskDescription()
{
auto zookeeper = context.getZooKeeper();
@ -2104,6 +2126,10 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper")
.argument("task-path").binding("task-path"));
options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path")
.argument("task-file").binding("task-file"));
options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists")
.argument("task-upload-force").binding("task-upload-force"));
options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors")
.binding("safe-mode"));
options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)")
@ -2154,6 +2180,11 @@ void ClusterCopierApp::mainImpl()
auto copier = std::make_unique<ClusterCopier>(task_path, host_id, default_database, *context);
copier->setSafeMode(is_safe_mode);
copier->setCopyFaultProbability(copy_fault_probability);
auto task_file = config().getString("task-file", "");
if (!task_file.empty())
copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
copier->init();
copier->process();
}

View File

@ -369,7 +369,7 @@ void LocalServer::setupUsers()
static void showClientVersion()
{
std::cout << DBMS_NAME << " client version " << VERSION_STRING << "." << '\n';
std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << '\n';
}
std::string LocalServer::getHelpHeader() const

View File

@ -296,7 +296,7 @@ void HTTPHandler::processQuery(
/// The client can pass a HTTP header indicating supported compression method (gzip or deflate).
String http_response_compression_methods = request.get("Accept-Encoding", "");
bool client_supports_http_compression = false;
ZlibCompressionMethod http_response_compression_method {};
CompressionMethod http_response_compression_method {};
if (!http_response_compression_methods.empty())
{
@ -305,12 +305,17 @@ void HTTPHandler::processQuery(
if (std::string::npos != http_response_compression_methods.find("gzip"))
{
client_supports_http_compression = true;
http_response_compression_method = ZlibCompressionMethod::Gzip;
http_response_compression_method = CompressionMethod::Gzip;
}
else if (std::string::npos != http_response_compression_methods.find("deflate"))
{
client_supports_http_compression = true;
http_response_compression_method = ZlibCompressionMethod::Zlib;
http_response_compression_method = CompressionMethod::Zlib;
}
else if (http_response_compression_methods == "br")
{
client_supports_http_compression = true;
http_response_compression_method = CompressionMethod::Brotli;
}
}
@ -394,11 +399,11 @@ void HTTPHandler::processQuery(
{
if (http_request_compression_method_str == "gzip")
{
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, ZlibCompressionMethod::Gzip);
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Gzip);
}
else if (http_request_compression_method_str == "deflate")
{
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, ZlibCompressionMethod::Zlib);
in_post = std::make_unique<ZlibInflatingReadBuffer>(*in_post_raw, CompressionMethod::Zlib);
}
#if USE_BROTLI
else if (http_request_compression_method_str == "br")
@ -606,7 +611,7 @@ void HTTPHandler::processQuery(
executeQuery(*in, *used_output.out_maybe_delayed_and_compressed, /* allow_into_outfile = */ false, context,
[&response] (const String & content_type) { response.setContentType(content_type); },
[&response] (const String & current_query_id) { response.add("Query-Id", current_query_id); });
[&response] (const String & current_query_id) { response.add("X-ClickHouse-Query-Id", current_query_id); });
if (used_output.hasDelayed())
{

View File

@ -132,7 +132,7 @@ int Server::run()
}
if (config().hasOption("version"))
{
std::cout << DBMS_NAME << " server version " << VERSION_STRING << "." << std::endl;
std::cout << DBMS_NAME << " server version " << VERSION_STRING << VERSION_OFFICIAL << "." << std::endl;
return 0;
}
return Application::run();

View File

@ -0,0 +1,85 @@
#include <AggregateFunctions/AggregateFunctionLeastSqr.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
namespace DB
{
namespace
{
AggregateFunctionPtr createAggregateFunctionLeastSqr(
const String & name,
const DataTypes & arguments,
const Array & params
)
{
assertNoParameters(name, params);
assertBinary(name, arguments);
const IDataType * x_arg = arguments.front().get();
WhichDataType which_x {
x_arg
};
const IDataType * y_arg = arguments.back().get();
WhichDataType which_y {
y_arg
};
#define FOR_LEASTSQR_TYPES_2(M, T) \
M(T, UInt8) \
M(T, UInt16) \
M(T, UInt32) \
M(T, UInt64) \
M(T, Int8) \
M(T, Int16) \
M(T, Int32) \
M(T, Int64) \
M(T, Float32) \
M(T, Float64)
#define FOR_LEASTSQR_TYPES(M) \
FOR_LEASTSQR_TYPES_2(M, UInt8) \
FOR_LEASTSQR_TYPES_2(M, UInt16) \
FOR_LEASTSQR_TYPES_2(M, UInt32) \
FOR_LEASTSQR_TYPES_2(M, UInt64) \
FOR_LEASTSQR_TYPES_2(M, Int8) \
FOR_LEASTSQR_TYPES_2(M, Int16) \
FOR_LEASTSQR_TYPES_2(M, Int32) \
FOR_LEASTSQR_TYPES_2(M, Int64) \
FOR_LEASTSQR_TYPES_2(M, Float32) \
FOR_LEASTSQR_TYPES_2(M, Float64)
#define DISPATCH(T1, T2) \
if (which_x.idx == TypeIndex::T1 && which_y.idx == TypeIndex::T2) \
return std::make_shared<AggregateFunctionLeastSqr<T1, T2>>( \
arguments, \
params \
);
FOR_LEASTSQR_TYPES(DISPATCH)
#undef FOR_LEASTSQR_TYPES_2
#undef FOR_LEASTSQR_TYPES
#undef DISPATCH
throw Exception(
"Illegal types ("
+ x_arg->getName() + ", " + y_arg->getName()
+ ") of arguments of aggregate function " + name
+ ", must be Native Ints, Native UInts or Floats",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
);
}
}
void registerAggregateFunctionLeastSqr(AggregateFunctionFactory & factory)
{
factory.registerFunction("leastSqr", createAggregateFunctionLeastSqr);
}
}

View File

@ -0,0 +1,195 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeTuple.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <limits>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
template <typename X, typename Y, typename Ret>
struct AggregateFunctionLeastSqrData final
{
size_t count = 0;
Ret sum_x = 0;
Ret sum_y = 0;
Ret sum_xx = 0;
Ret sum_xy = 0;
void add(X x, Y y)
{
count += 1;
sum_x += x;
sum_y += y;
sum_xx += x * x;
sum_xy += x * y;
}
void merge(const AggregateFunctionLeastSqrData & other)
{
count += other.count;
sum_x += other.sum_x;
sum_y += other.sum_y;
sum_xx += other.sum_xx;
sum_xy += other.sum_xy;
}
void serialize(WriteBuffer & buf) const
{
writeBinary(count, buf);
writeBinary(sum_x, buf);
writeBinary(sum_y, buf);
writeBinary(sum_xx, buf);
writeBinary(sum_xy, buf);
}
void deserialize(ReadBuffer & buf)
{
readBinary(count, buf);
readBinary(sum_x, buf);
readBinary(sum_y, buf);
readBinary(sum_xx, buf);
readBinary(sum_xy, buf);
}
Ret getK() const
{
Ret divisor = sum_xx * count - sum_x * sum_x;
if (divisor == 0)
return std::numeric_limits<Ret>::quiet_NaN();
return (sum_xy * count - sum_x * sum_y) / divisor;
}
Ret getB(Ret k) const
{
if (count == 0)
return std::numeric_limits<Ret>::quiet_NaN();
return (sum_y - k * sum_x) / count;
}
};
/// Calculates simple linear regression parameters.
/// Result is a tuple (k, b) for y = k * x + b equation, solved by least squares approximation.
template <typename X, typename Y, typename Ret = Float64>
class AggregateFunctionLeastSqr final : public IAggregateFunctionDataHelper<
AggregateFunctionLeastSqrData<X, Y, Ret>,
AggregateFunctionLeastSqr<X, Y, Ret>
>
{
public:
AggregateFunctionLeastSqr(
const DataTypes & arguments,
const Array & params
):
IAggregateFunctionDataHelper<
AggregateFunctionLeastSqrData<X, Y, Ret>,
AggregateFunctionLeastSqr<X, Y, Ret>
> {arguments, params}
{
// notice: arguments has been checked before
}
String getName() const override
{
return "leastSqr";
}
const char * getHeaderFilePath() const override
{
return __FILE__;
}
void add(
AggregateDataPtr place,
const IColumn ** columns,
size_t row_num,
Arena *
) const override
{
auto col_x {
static_cast<const ColumnVector<X> *>(columns[0])
};
auto col_y {
static_cast<const ColumnVector<Y> *>(columns[1])
};
X x = col_x->getData()[row_num];
Y y = col_y->getData()[row_num];
this->data(place).add(x, y);
}
void merge(
AggregateDataPtr place,
ConstAggregateDataPtr rhs, Arena *
) const override
{
this->data(place).merge(this->data(rhs));
}
void serialize(
ConstAggregateDataPtr place,
WriteBuffer & buf
) const override
{
this->data(place).serialize(buf);
}
void deserialize(
AggregateDataPtr place,
ReadBuffer & buf, Arena *
) const override
{
this->data(place).deserialize(buf);
}
DataTypePtr getReturnType() const override
{
DataTypes types {
std::make_shared<DataTypeNumber<Ret>>(),
std::make_shared<DataTypeNumber<Ret>>(),
};
Strings names {
"k",
"b",
};
return std::make_shared<DataTypeTuple>(
std::move(types),
std::move(names)
);
}
void insertResultInto(
ConstAggregateDataPtr place,
IColumn & to
) const override
{
Ret k = this->data(place).getK();
Ret b = this->data(place).getB(k);
auto & col_tuple = static_cast<ColumnTuple &>(to);
auto & col_k = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(0));
auto & col_b = static_cast<ColumnVector<Ret> &>(col_tuple.getColumn(1));
col_k.getData().push_back(k);
col_b.getData().push_back(b);
}
};
}

View File

@ -24,8 +24,7 @@ struct WithoutOverflowPolicy
static DataTypePtr promoteType(const DataTypePtr & data_type)
{
if (!data_type->canBePromoted())
throw new Exception{"Values to be summed are expected to be Numeric, Float or Decimal.",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
throw Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
return data_type->promoteNumericType();
}

View File

@ -16,7 +16,6 @@
#include <Common/HashTable/HashSet.h>
#include <Common/HyperLogLogWithSmallSetOptimization.h>
#include <Common/CombinedCardinalityEstimator.h>
#include <Common/MemoryTracker.h>
#include <Common/typeid_cast.h>
#include <AggregateFunctions/UniquesHashSet.h>

View File

@ -29,6 +29,7 @@ void registerAggregateFunctionsBitwise(AggregateFunctionFactory &);
void registerAggregateFunctionsBitmap(AggregateFunctionFactory &);
void registerAggregateFunctionsMaxIntersections(AggregateFunctionFactory &);
void registerAggregateFunctionEntropy(AggregateFunctionFactory &);
void registerAggregateFunctionLeastSqr(AggregateFunctionFactory &);
void registerAggregateFunctionCombinatorIf(AggregateFunctionCombinatorFactory &);
void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory &);
@ -69,6 +70,7 @@ void registerAggregateFunctions()
registerAggregateFunctionHistogram(factory);
registerAggregateFunctionRetention(factory);
registerAggregateFunctionEntropy(factory);
registerAggregateFunctionLeastSqr(factory);
}
{

View File

@ -271,7 +271,7 @@ private:
void initBlockInput();
void initBlockLogsInput();
void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
[[noreturn]] void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
};
}

View File

@ -21,6 +21,7 @@ namespace DB
class ColumnString final : public COWPtrHelper<IColumn, ColumnString>
{
public:
using Char = UInt8;
using Chars = PaddedPODArray<UInt8>;
private:

View File

@ -10,7 +10,7 @@ namespace DB
/** Aligned piece of memory.
* It can only be allocated and destroyed.
* MemoryTracker is not used. It is intended for small pieces of memory.
* MemoryTracker is not used. AlignedBuffer is intended for small pieces of memory.
*/
class AlignedBuffer : private boost::noncopyable
{

View File

@ -1,190 +0,0 @@
#include <Common/Allocator.h>
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#include <cstdlib>
#include <algorithm>
#include <sys/mman.h>
#include <Core/Defines.h>
#ifdef THREAD_SANITIZER
/// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
#define DISABLE_MREMAP 1
#endif
#include <common/mremap.h>
#include <Common/MemoryTracker.h>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
#include <IO/WriteHelpers.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
}
}
/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
* even in case of large enough chunks of memory.
* Although this allows you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address space is
* very slow, especially in the case of a large number of threads.
* We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
*
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
*/
#ifdef NDEBUG
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
#else
/// In debug build, use small mmap threshold to reproduce more memory stomping bugs.
/// Along with ASLR it will hopefully detect more issues than ASan.
/// The program may fail due to the limit on number of memory mappings.
static constexpr size_t MMAP_THRESHOLD = 4096;
#endif
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
template <bool clear_memory_>
void * Allocator<clear_memory_>::mmap_hint()
{
#if ALLOCATOR_ASLR
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(rng));
#else
return nullptr;
#endif
}
template <bool clear_memory_>
void * Allocator<clear_memory_>::alloc(size_t size, size_t alignment)
{
CurrentMemoryTracker::alloc(size);
void * buf;
if (size >= MMAP_THRESHOLD)
{
if (alignment > MMAP_MIN_ALIGNMENT)
throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
+ formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
buf = mmap(mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
}
else
{
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
if (nullptr == buf)
DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res)
DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if (clear_memory)
memset(buf, 0, size);
}
}
return buf;
}
template <bool clear_memory_>
void Allocator<clear_memory_>::free(void * buf, size_t size)
{
if (size >= MMAP_THRESHOLD)
{
if (0 != munmap(buf, size))
DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
}
else
{
::free(buf);
}
CurrentMemoryTracker::free(size);
}
template <bool clear_memory_>
void * Allocator<clear_memory_>::realloc(void * buf, size_t old_size, size_t new_size, size_t alignment)
{
if (old_size == new_size)
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (old_size < MMAP_THRESHOLD && new_size < MMAP_THRESHOLD && alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
if (clear_memory && new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= MMAP_THRESHOLD && new_size >= MMAP_THRESHOLD)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else
{
/// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
return buf;
}
/// Explicit template instantiations.
template class Allocator<true>;
template class Allocator<false>;

View File

@ -10,11 +10,88 @@
#define ALLOCATOR_ASLR 1
#endif
#if ALLOCATOR_ASLR
#include <pcg_random.hpp>
#include <Common/randomSeed.h>
#include <pcg_random.hpp>
#include <Common/randomSeed.h>
#if !defined(__APPLE__) && !defined(__FreeBSD__)
#include <malloc.h>
#endif
#include <cstdlib>
#include <algorithm>
#include <sys/mman.h>
#include <Core/Defines.h>
#ifdef THREAD_SANITIZER
/// Thread sanitizer does not intercept mremap. The usage of mremap will lead to false positives.
#define DISABLE_MREMAP 1
#endif
#include <common/mremap.h>
#include <Common/MemoryTracker.h>
#include <Common/Exception.h>
#include <Common/formatReadable.h>
/// Required for older Darwin builds, that lack definition of MAP_ANONYMOUS
#ifndef MAP_ANONYMOUS
#define MAP_ANONYMOUS MAP_ANON
#endif
/** Many modern allocators (for example, tcmalloc) do not do a mremap for realloc,
* even in case of large enough chunks of memory.
* Although this allows you to increase performance and reduce memory consumption during realloc.
* To fix this, we do mremap manually if the chunk of memory is large enough.
* The threshold (64 MB) is chosen quite large, since changing the address space is
* very slow, especially in the case of a large number of threads.
* We expect that the set of operations mmap/something to do/mremap can only be performed about 1000 times per second.
*
* PS. This is also required, because tcmalloc can not allocate a chunk of memory greater than 16 GB.
*/
#ifdef NDEBUG
static constexpr size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
#else
/// In debug build, use small mmap threshold to reproduce more memory stomping bugs.
/// Along with ASLR it will hopefully detect more issues than ASan.
/// The program may fail due to the limit on number of memory mappings.
static constexpr size_t MMAP_THRESHOLD = 4096;
#endif
static constexpr size_t MMAP_MIN_ALIGNMENT = 4096;
static constexpr size_t MALLOC_MIN_ALIGNMENT = 8;
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int CANNOT_MUNMAP;
extern const int CANNOT_MREMAP;
}
}
namespace AllocatorHints
{
struct DefaultHint
{
void * mmap_hint()
{
return nullptr;
}
};
struct RandomHint
{
void * mmap_hint()
{
return reinterpret_cast<void *>(std::uniform_int_distribution<intptr_t>(0x100000000000UL, 0x700000000000UL)(rng));
}
private:
pcg64 rng{randomSeed()};
};
}
/** Responsible for allocating / freeing memory. Used, for example, in PODArray, Arena.
* Also used in hash tables.
@ -23,31 +100,126 @@
* - passing the size into the `free` method;
* - by the presence of the `alignment` argument;
* - the possibility of zeroing memory (used in hash tables);
* - hint class for mmap
* - mmap_threshold for using mmap less or more
*/
template <bool clear_memory_>
class Allocator
template <bool clear_memory_, typename Hint, size_t mmap_threshold>
class AllocatorWithHint : Hint
{
#if ALLOCATOR_ASLR
private:
pcg64 rng{randomSeed()};
#endif
void * mmap_hint();
protected:
static constexpr bool clear_memory = clear_memory_;
public:
/// Allocate memory range.
void * alloc(size_t size, size_t alignment = 0);
void * alloc(size_t size, size_t alignment = 0)
{
CurrentMemoryTracker::alloc(size);
void * buf;
if (size >= mmap_threshold)
{
if (alignment > MMAP_MIN_ALIGNMENT)
throw DB::Exception("Too large alignment " + formatReadableSizeWithBinarySuffix(alignment) + ": more than page size when allocating "
+ formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::BAD_ARGUMENTS);
buf = mmap(Hint::mmap_hint(), size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno("Allocator: Cannot mmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
/// No need for zero-fill, because mmap guarantees it.
}
else
{
if (alignment <= MALLOC_MIN_ALIGNMENT)
{
if constexpr (clear_memory)
buf = ::calloc(size, 1);
else
buf = ::malloc(size);
if (nullptr == buf)
DB::throwFromErrno("Allocator: Cannot malloc " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
else
{
buf = nullptr;
int res = posix_memalign(&buf, alignment, size);
if (0 != res)
DB::throwFromErrno("Cannot allocate memory (posix_memalign) " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY, res);
if (clear_memory)
memset(buf, 0, size);
}
}
return buf;
}
/// Free memory range.
void free(void * buf, size_t size);
void free(void * buf, size_t size)
{
if (size >= mmap_threshold)
{
if (0 != munmap(buf, size))
DB::throwFromErrno("Allocator: Cannot munmap " + formatReadableSizeWithBinarySuffix(size) + ".", DB::ErrorCodes::CANNOT_MUNMAP);
}
else
{
::free(buf);
}
CurrentMemoryTracker::free(size);
}
/** Enlarge memory range.
* Data from old range is moved to the beginning of new range.
* Address of memory range could change.
*/
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0);
void * realloc(void * buf, size_t old_size, size_t new_size, size_t alignment = 0)
{
if (old_size == new_size)
{
/// nothing to do.
/// BTW, it's not possible to change alignment while doing realloc.
}
else if (old_size < mmap_threshold && new_size < mmap_threshold && alignment <= MALLOC_MIN_ALIGNMENT)
{
/// Resize malloc'd memory region with no special alignment requirement.
CurrentMemoryTracker::realloc(old_size, new_size);
void * new_buf = ::realloc(buf, new_size);
if (nullptr == new_buf)
DB::throwFromErrno("Allocator: Cannot realloc from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
buf = new_buf;
if (clear_memory && new_size > old_size)
memset(reinterpret_cast<char *>(buf) + old_size, 0, new_size - old_size);
}
else if (old_size >= mmap_threshold && new_size >= mmap_threshold)
{
/// Resize mmap'd memory region.
CurrentMemoryTracker::realloc(old_size, new_size);
// On apple and freebsd self-implemented mremap used (common/mremap.h)
buf = clickhouse_mremap(buf, old_size, new_size, MREMAP_MAYMOVE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (MAP_FAILED == buf)
DB::throwFromErrno("Allocator: Cannot mremap memory chunk from " + formatReadableSizeWithBinarySuffix(old_size) + " to " + formatReadableSizeWithBinarySuffix(new_size) + ".", DB::ErrorCodes::CANNOT_MREMAP);
/// No need for zero-fill, because mmap guarantees it.
}
else
{
/// All other cases that requires a copy. MemoryTracker is called inside 'alloc', 'free' methods.
void * new_buf = alloc(new_size, alignment);
memcpy(new_buf, buf, std::min(old_size, new_size));
free(buf, old_size);
buf = new_buf;
}
return buf;
}
protected:
static constexpr size_t getStackThreshold()
@ -56,6 +228,13 @@ protected:
}
};
#if ALLOCATOR_ASLR
template <bool clear_memory>
using Allocator = AllocatorWithHint<clear_memory, AllocatorHints::RandomHint, MMAP_THRESHOLD>;
#else
template <bool clear_memory>
using Allocator = AllocatorWithHint<clear_memory, AllocatorHints::DefaultHint, MMAP_THRESHOLD>;
#endif
/** When using AllocatorWithStackMemory, located on the stack,
* GCC 4.9 mistakenly assumes that we can call `free` from a pointer to the stack.

View File

@ -49,7 +49,7 @@ private:
ProfileEvents::increment(ProfileEvents::ArenaAllocChunks);
ProfileEvents::increment(ProfileEvents::ArenaAllocBytes, size_);
begin = reinterpret_cast<char *>(Allocator::alloc(size_));
begin = reinterpret_cast<char *>(Allocator<false>::alloc(size_));
pos = begin;
end = begin + size_ - pad_right;
prev = prev_;
@ -57,7 +57,7 @@ private:
~Chunk()
{
Allocator::free(begin, size());
Allocator<false>::free(begin, size());
if (prev)
delete prev;

View File

@ -55,7 +55,7 @@ public:
char * alloc(const size_t size)
{
if (size > max_fixed_block_size)
return static_cast<char *>(Allocator::alloc(size));
return static_cast<char *>(Allocator<false>::alloc(size));
/// find list of required size
const auto list_idx = findFreeListIndex(size);
@ -76,7 +76,7 @@ public:
void free(char * ptr, const size_t size)
{
if (size > max_fixed_block_size)
return Allocator::free(ptr, size);
return Allocator<false>::free(ptr, size);
/// find list of required size
const auto list_idx = findFreeListIndex(size);

View File

@ -422,6 +422,10 @@ namespace ErrorCodes
extern const int CANNOT_MPROTECT = 445;
extern const int FUNCTION_NOT_ALLOWED = 446;
extern const int HYPERSCAN_CANNOT_SCAN_TEXT = 447;
extern const int BROTLI_READ_FAILED = 448;
extern const int BROTLI_WRITE_FAILED = 449;
extern const int BAD_TTL_EXPRESSION = 450;
extern const int BAD_TTL_FILE = 451;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -21,11 +21,6 @@ namespace ErrorCodes
extern const int CANNOT_TRUNCATE_FILE;
}
const char * getVersion()
{
return VERSION_STRING;
}
std::string errnoToString(int code, int e)
{
const size_t buf_size = 128;
@ -82,14 +77,15 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
}
catch (const Exception & e)
{
stream << "(version " << getVersion() << ") " << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace);
stream << getExceptionMessage(e, with_stacktrace, check_embedded_stacktrace) << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
}
catch (const Poco::Exception & e)
{
try
{
stream << "(version " << getVersion() << ") " << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
<< ", e.displayText() = " << e.displayText();
stream << "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
<< ", e.displayText() = " << e.displayText()
<< " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
}
catch (...) {}
}
@ -103,7 +99,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
if (status)
name += " (demangling status: " + toString(status) + ")";
stream << "(version " << getVersion() << ") " << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what();
stream << "std::exception. Code: " << ErrorCodes::STD_EXCEPTION << ", type: " << name << ", e.what() = " << e.what() << ", version = " << VERSION_STRING << VERSION_OFFICIAL;
}
catch (...) {}
}
@ -117,7 +113,7 @@ std::string getCurrentExceptionMessage(bool with_stacktrace, bool check_embedded
if (status)
name += " (demangling status: " + toString(status) + ")";
stream << "(version " << getVersion() << ") " << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name;
stream << "Unknown exception. Code: " << ErrorCodes::UNKNOWN_EXCEPTION << ", type: " << name << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
}
catch (...) {}
}

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
* template parameter is available as Creator
*/
template <typename CreatorFunc>
class IFactoryWithAliases
class IFactoryWithAliases : public IHints<2, IFactoryWithAliases<CreatorFunc>>
{
protected:
using Creator = CreatorFunc;
@ -76,7 +76,7 @@ public:
throw Exception(factory_name + ": alias name '" + alias_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
}
std::vector<String> getAllRegisteredNames() const
std::vector<String> getAllRegisteredNames() const override
{
std::vector<String> result;
auto getter = [](const auto & pair) { return pair.first; };
@ -106,13 +106,7 @@ public:
return aliases.count(name) || case_insensitive_aliases.count(name);
}
std::vector<String> getHints(const String & name) const
{
static const auto registered_names = getAllRegisteredNames();
return prompter.getHints(name, registered_names);
}
virtual ~IFactoryWithAliases() {}
virtual ~IFactoryWithAliases() override {}
private:
using InnerMap = std::unordered_map<String, Creator>; // name -> creator
@ -127,13 +121,6 @@ private:
/// Case insensitive aliases
AliasMap case_insensitive_aliases;
/**
* prompter for names, if a person makes a typo for some function or type, it
* helps to find best possible match (in particular, edit distance is done like in clang
* (max edit distance is (typo.size() + 2) / 3)
*/
NamePrompter</*MaxNumHints=*/2> prompter;
};
}

View File

@ -0,0 +1,53 @@
#include <Common/config.h>
#if USE_LFALLOC
#include "LFAllocator.h"
#include <cstring>
#include <lf_allocX64.h>
namespace DB
{
void * LFAllocator::alloc(size_t size, size_t alignment)
{
if (alignment == 0)
return LFAlloc(size);
else
{
void * ptr;
int res = LFPosixMemalign(&ptr, alignment, size);
return res ? nullptr : ptr;
}
}
void LFAllocator::free(void * buf, size_t)
{
LFFree(buf);
}
void * LFAllocator::realloc(void * old_ptr, size_t, size_t new_size, size_t alignment)
{
if (old_ptr == nullptr)
{
void * result = LFAllocator::alloc(new_size, alignment);
return result;
}
if (new_size == 0)
{
LFFree(old_ptr);
return nullptr;
}
void * new_ptr = LFAllocator::alloc(new_size, alignment);
if (new_ptr == nullptr)
return nullptr;
size_t old_size = LFGetSize(old_ptr);
memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
LFFree(old_ptr);
return new_ptr;
}
}
#endif

View File

@ -0,0 +1,22 @@
#pragma once
#include <Common/config.h>
#if !USE_LFALLOC
#error "do not include this file until USE_LFALLOC is set to 1"
#endif
#include <cstddef>
namespace DB
{
struct LFAllocator
{
static void * alloc(size_t size, size_t alignment = 0);
static void free(void * buf, size_t);
static void * realloc(void * buf, size_t, size_t new_size, size_t alignment = 0);
};
}

View File

@ -97,4 +97,23 @@ private:
}
};
template <size_t MaxNumHints, class Self>
class IHints
{
public:
virtual std::vector<String> getAllRegisteredNames() const = 0;
std::vector<String> getHints(const String & name) const
{
static const auto registered_names = getAllRegisteredNames();
return prompter.getHints(name, registered_names);
}
virtual ~IHints() = default;
private:
NamePrompter<MaxNumHints> prompter;
};
}

View File

@ -1,48 +0,0 @@
#pragma once
#include <Common/PODArray.h>
namespace DB
{
/**
* This class is intended to push sortable data into.
* When looking up values the container ensures that it is sorted for log(N) lookup
*
* Note, this is only efficient when the insertions happen in one stage, followed by all retrievals
* This way the data only gets sorted once.
*/
template <typename T, size_t INITIAL_SIZE = 4096, typename TAllocator = Allocator<false>>
class SortedLookupPODArray : private PaddedPODArray<T, INITIAL_SIZE, TAllocator>
{
public:
using Base = PaddedPODArray<T, INITIAL_SIZE, TAllocator>;
using typename Base::PODArray;
using Base::cbegin;
using Base::cend;
template <typename U, typename ... TAllocatorParams>
void insert(U && x, TAllocatorParams &&... allocator_params)
{
Base::push_back(std::forward<U>(x), std::forward<TAllocatorParams>(allocator_params)...);
sorted = false;
}
typename Base::const_iterator upper_bound (const T& k)
{
if (!sorted)
this->sort();
return std::upper_bound(this->cbegin(), this->cend(), k);
}
private:
void sort()
{
std::sort(this->begin(), this->end());
sorted = true;
}
bool sorted = false;
};
}

View File

@ -25,6 +25,8 @@
#cmakedefine01 USE_BROTLI
#cmakedefine01 USE_SSL
#cmakedefine01 USE_HYPERSCAN
#cmakedefine01 USE_LFALLOC
#cmakedefine01 USE_LFALLOC_RANDOM_HINT
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 LLVM_HAS_RTTI

View File

@ -20,6 +20,7 @@
#cmakedefine VERSION_MINOR @VERSION_MINOR@
#cmakedefine VERSION_PATCH @VERSION_PATCH@
#cmakedefine VERSION_STRING "@VERSION_STRING@"
#cmakedefine VERSION_OFFICIAL "@VERSION_OFFICIAL@"
#cmakedefine VERSION_FULL "@VERSION_FULL@"
#cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@"
#cmakedefine VERSION_GITHASH "@VERSION_GITHASH@"
@ -42,3 +43,7 @@
#else
#define DBMS_VERSION_PATCH 0
#endif
#if !defined(VERSION_OFFICIAL)
# define VERSION_OFFICIAL ""
#endif

View File

@ -35,7 +35,7 @@ bool CachedCompressedReadBuffer::nextImpl()
UInt128 key = cache->hash(path, file_pos);
owned_cell = cache->get(key);
if (!owned_cell || !codec)
if (!owned_cell)
{
/// If not, read it from the file.
initInput();
@ -49,21 +49,22 @@ bool CachedCompressedReadBuffer::nextImpl()
if (owned_cell->compressed_size)
{
owned_cell->data.resize(size_decompressed + codec->getAdditionalSizeAtTheEndOfBuffer());
owned_cell->additional_bytes = codec->getAdditionalSizeAtTheEndOfBuffer();
owned_cell->data.resize(size_decompressed + owned_cell->additional_bytes);
decompress(owned_cell->data.data(), size_decompressed, size_compressed_without_checksum);
/// Put data into cache.
cache->set(key, owned_cell);
}
/// Put data into cache.
/// NOTE: Even if we don't read anything (compressed_size == 0)
/// because we can reuse this information and don't reopen file in future
cache->set(key, owned_cell);
}
if (owned_cell->data.size() == 0)
{
owned_cell = nullptr;
return false;
}
working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - codec->getAdditionalSizeAtTheEndOfBuffer());
working_buffer = Buffer(owned_cell->data.data(), owned_cell->data.data() + owned_cell->data.size() - owned_cell->additional_bytes);
file_pos += owned_cell->compressed_size;

View File

@ -125,19 +125,34 @@ void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_
}
}
namespace
{
UInt8 getDeltaBytesSize(DataTypePtr column_type)
{
UInt8 delta_bytes_size = 1;
if (column_type && column_type->haveMaximumSizeOfValue())
{
size_t max_size = column_type->getSizeOfValueInMemory();
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
delta_bytes_size = static_cast<UInt8>(max_size);
}
return delta_bytes_size;
}
}
void CompressionCodecDelta::useInfoAboutType(DataTypePtr data_type)
{
delta_bytes_size = getDeltaBytesSize(data_type);
}
void registerCodecDelta(CompressionCodecFactory & factory)
{
UInt8 method_code = UInt8(CompressionMethodByte::Delta);
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
{
UInt8 delta_bytes_size = 1;
if (column_type && column_type->haveMaximumSizeOfValue())
{
size_t max_size = column_type->getSizeOfValueInMemory();
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
delta_bytes_size = static_cast<UInt8>(max_size);
}
UInt8 delta_bytes_size = getDeltaBytesSize(column_type);
if (arguments && !arguments->children.empty())
{
if (arguments->children.size() > 1)

View File

@ -14,15 +14,18 @@ public:
String getCodecDesc() const override;
void useInfoAboutType(DataTypePtr data_type) override;
protected:
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; }
private:
const UInt8 delta_bytes_size;
UInt8 delta_bytes_size;
};
}

View File

@ -21,16 +21,6 @@ extern const int CORRUPTED_DATA;
CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs)
: codecs(codecs)
{
std::ostringstream ss;
for (size_t idx = 0; idx < codecs.size(); idx++)
{
if (idx != 0)
ss << ',' << ' ';
const auto codec = codecs[idx];
ss << codec->getCodecDesc();
}
codec_desc = ss.str();
}
UInt8 CompressionCodecMultiple::getMethodByte() const
@ -40,7 +30,16 @@ UInt8 CompressionCodecMultiple::getMethodByte() const
String CompressionCodecMultiple::getCodecDesc() const
{
return codec_desc;
std::ostringstream ss;
for (size_t idx = 0; idx < codecs.size(); idx++)
{
if (idx != 0)
ss << ',' << ' ';
const auto codec = codecs[idx];
ss << codec->getCodecDesc();
}
return ss.str();
}
UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const
@ -79,6 +78,14 @@ UInt32 CompressionCodecMultiple::doCompressData(const char * source, UInt32 sour
return 1 + codecs.size() + source_size;
}
void CompressionCodecMultiple::useInfoAboutType(DataTypePtr data_type)
{
for (auto & codec : codecs)
{
codec->useInfoAboutType(data_type);
}
}
void CompressionCodecMultiple::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const
{
UInt8 compression_methods_size = source[0];

View File

@ -17,6 +17,8 @@ public:
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
void useInfoAboutType(DataTypePtr data_type) override;
protected:
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
@ -24,7 +26,6 @@ protected:
private:
Codecs codecs;
String codec_desc;
};

View File

@ -58,6 +58,9 @@ public:
/// Read method byte from compressed source
static UInt8 readMethod(const char * source);
/// Some codecs may use information about column type which appears after codec creation
virtual void useInfoAboutType(DataTypePtr /* data_type */) { }
protected:
/// Return size of compressed data without header

View File

@ -23,20 +23,21 @@ namespace DB
class TaskNotification final : public Poco::Notification
{
public:
explicit TaskNotification(const BackgroundSchedulePool::TaskInfoPtr & task) : task(task) {}
explicit TaskNotification(const BackgroundSchedulePoolTaskInfoPtr & task) : task(task) {}
void execute() { task->execute(); }
private:
BackgroundSchedulePool::TaskInfoPtr task;
BackgroundSchedulePoolTaskInfoPtr task;
};
BackgroundSchedulePool::TaskInfo::TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_)
: pool(pool_) , log_name(log_name_) , function(function_)
BackgroundSchedulePoolTaskInfo::BackgroundSchedulePoolTaskInfo(
BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_)
: pool(pool_), log_name(log_name_), function(function_)
{
}
bool BackgroundSchedulePool::TaskInfo::schedule()
bool BackgroundSchedulePoolTaskInfo::schedule()
{
std::lock_guard lock(schedule_mutex);
@ -47,7 +48,7 @@ bool BackgroundSchedulePool::TaskInfo::schedule()
return true;
}
bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms)
bool BackgroundSchedulePoolTaskInfo::scheduleAfter(size_t ms)
{
std::lock_guard lock(schedule_mutex);
@ -58,7 +59,7 @@ bool BackgroundSchedulePool::TaskInfo::scheduleAfter(size_t ms)
return true;
}
void BackgroundSchedulePool::TaskInfo::deactivate()
void BackgroundSchedulePoolTaskInfo::deactivate()
{
std::lock_guard lock_exec(exec_mutex);
std::lock_guard lock_schedule(schedule_mutex);
@ -73,13 +74,13 @@ void BackgroundSchedulePool::TaskInfo::deactivate()
pool.cancelDelayedTask(shared_from_this(), lock_schedule);
}
void BackgroundSchedulePool::TaskInfo::activate()
void BackgroundSchedulePoolTaskInfo::activate()
{
std::lock_guard lock(schedule_mutex);
deactivated = false;
}
bool BackgroundSchedulePool::TaskInfo::activateAndSchedule()
bool BackgroundSchedulePoolTaskInfo::activateAndSchedule()
{
std::lock_guard lock(schedule_mutex);
@ -91,7 +92,7 @@ bool BackgroundSchedulePool::TaskInfo::activateAndSchedule()
return true;
}
void BackgroundSchedulePool::TaskInfo::execute()
void BackgroundSchedulePoolTaskInfo::execute()
{
Stopwatch watch;
CurrentMetrics::Increment metric_increment{CurrentMetrics::BackgroundSchedulePoolTask};
@ -131,7 +132,7 @@ void BackgroundSchedulePool::TaskInfo::execute()
}
}
void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
void BackgroundSchedulePoolTaskInfo::scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock)
{
scheduled = true;
@ -145,7 +146,7 @@ void BackgroundSchedulePool::TaskInfo::scheduleImpl(std::lock_guard<std::mutex>
pool.queue.enqueueNotification(new TaskNotification(shared_from_this()));
}
Coordination::WatchCallback BackgroundSchedulePool::TaskInfo::getWatchCallback()
Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback()
{
return [t = shared_from_this()](const Coordination::WatchResponse &)
{

View File

@ -20,6 +20,8 @@ namespace DB
{
class TaskNotification;
class BackgroundSchedulePoolTaskInfo;
class BackgroundSchedulePoolTaskHolder;
/** Executes functions scheduled at a specific point in time.
@ -35,84 +37,14 @@ class TaskNotification;
class BackgroundSchedulePool
{
public:
class TaskInfo;
friend class BackgroundSchedulePoolTaskInfo;
using TaskInfo = BackgroundSchedulePoolTaskInfo;
using TaskInfoPtr = std::shared_ptr<TaskInfo>;
using TaskFunc = std::function<void()>;
using TaskHolder = BackgroundSchedulePoolTaskHolder;
using DelayedTasks = std::multimap<Poco::Timestamp, TaskInfoPtr>;
class TaskInfo : public std::enable_shared_from_this<TaskInfo>, private boost::noncopyable
{
public:
TaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const TaskFunc & function_);
/// Schedule for execution as soon as possible (if not already scheduled).
/// If the task was already scheduled with delay, the delay will be ignored.
bool schedule();
/// Schedule for execution after specified delay.
bool scheduleAfter(size_t ms);
/// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
void deactivate();
void activate();
/// Atomically activate task and schedule it for execution.
bool activateAndSchedule();
/// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
Coordination::WatchCallback getWatchCallback();
private:
friend class TaskNotification;
friend class BackgroundSchedulePool;
void execute();
void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
BackgroundSchedulePool & pool;
std::string log_name;
TaskFunc function;
std::mutex exec_mutex;
std::mutex schedule_mutex;
/// Invariants:
/// * If deactivated is true then scheduled, delayed and executing are all false.
/// * scheduled and delayed cannot be true at the same time.
bool deactivated = false;
bool scheduled = false;
bool delayed = false;
bool executing = false;
/// If the task is scheduled with delay, points to element of delayed_tasks.
DelayedTasks::iterator iterator;
};
class TaskHolder
{
public:
TaskHolder() = default;
explicit TaskHolder(const TaskInfoPtr & task_info_) : task_info(task_info_) {}
TaskHolder(const TaskHolder & other) = delete;
TaskHolder(TaskHolder && other) noexcept = default;
TaskHolder & operator=(const TaskHolder & other) noexcept = delete;
TaskHolder & operator=(TaskHolder && other) noexcept = default;
~TaskHolder()
{
if (task_info)
task_info->deactivate();
}
TaskInfo * operator->() { return task_info.get(); }
const TaskInfo * operator->() const { return task_info.get(); }
private:
TaskInfoPtr task_info;
};
TaskHolder createTask(const std::string & log_name, const TaskFunc & function);
size_t getNumberOfThreads() const { return size; }
@ -153,4 +85,81 @@ private:
void attachToThreadGroup();
};
class BackgroundSchedulePoolTaskInfo : public std::enable_shared_from_this<BackgroundSchedulePoolTaskInfo>, private boost::noncopyable
{
public:
BackgroundSchedulePoolTaskInfo(BackgroundSchedulePool & pool_, const std::string & log_name_, const BackgroundSchedulePool::TaskFunc & function_);
/// Schedule for execution as soon as possible (if not already scheduled).
/// If the task was already scheduled with delay, the delay will be ignored.
bool schedule();
/// Schedule for execution after specified delay.
bool scheduleAfter(size_t ms);
/// Further attempts to schedule become no-op. Will wait till the end of the current execution of the task.
void deactivate();
void activate();
/// Atomically activate task and schedule it for execution.
bool activateAndSchedule();
/// get Coordination::WatchCallback needed for notifications from ZooKeeper watches.
Coordination::WatchCallback getWatchCallback();
private:
friend class TaskNotification;
friend class BackgroundSchedulePool;
void execute();
void scheduleImpl(std::lock_guard<std::mutex> & schedule_mutex_lock);
BackgroundSchedulePool & pool;
std::string log_name;
BackgroundSchedulePool::TaskFunc function;
std::mutex exec_mutex;
std::mutex schedule_mutex;
/// Invariants:
/// * If deactivated is true then scheduled, delayed and executing are all false.
/// * scheduled and delayed cannot be true at the same time.
bool deactivated = false;
bool scheduled = false;
bool delayed = false;
bool executing = false;
/// If the task is scheduled with delay, points to element of delayed_tasks.
BackgroundSchedulePool::DelayedTasks::iterator iterator;
};
using BackgroundSchedulePoolTaskInfoPtr = std::shared_ptr<BackgroundSchedulePoolTaskInfo>;
class BackgroundSchedulePoolTaskHolder
{
public:
BackgroundSchedulePoolTaskHolder() = default;
explicit BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskInfoPtr & task_info_) : task_info(task_info_) {}
BackgroundSchedulePoolTaskHolder(const BackgroundSchedulePoolTaskHolder & other) = delete;
BackgroundSchedulePoolTaskHolder(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
BackgroundSchedulePoolTaskHolder & operator=(const BackgroundSchedulePoolTaskHolder & other) noexcept = delete;
BackgroundSchedulePoolTaskHolder & operator=(BackgroundSchedulePoolTaskHolder && other) noexcept = default;
~BackgroundSchedulePoolTaskHolder()
{
if (task_info)
task_info->deactivate();
}
BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); }
const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); }
private:
BackgroundSchedulePoolTaskInfoPtr task_info;
};
}

View File

@ -5,8 +5,6 @@
#include <DataStreams/IBlockInputStream.h>
#include <Common/CurrentMetrics.h>
#include <Common/ThreadPool.h>
#include <Common/MemoryTracker.h>
#include <Poco/Ext/ThreadNumber.h>
namespace CurrentMetrics

View File

@ -43,6 +43,9 @@ struct BlockIO
BlockIO & operator= (const BlockIO & rhs)
{
if (this == &rhs)
return *this;
out.reset();
in.reset();
process_list_entry.reset();

View File

@ -6,6 +6,10 @@
#include <IO/WriteHelpers.h>
#include <Common/PODArray.h>
#include <Common/config.h>
#if USE_LFALLOC
#include <Common/LFAllocator.h>
#endif
namespace DB
{
@ -33,7 +37,9 @@ struct MarkInCompressedFile
return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")";
}
};
#if USE_LFALLOC
using MarksInCompressedFile = PODArray<MarkInCompressedFile, 4096, LFAllocator>;
#else
using MarksInCompressedFile = PODArray<MarkInCompressedFile>;
#endif
}

View File

@ -1,7 +1,6 @@
#include <future>
#include <Common/setThreadName.h>
#include <Common/CurrentMetrics.h>
#include <Common/MemoryTracker.h>
#include <DataStreams/MergingAggregatedMemoryEfficientBlockInputStream.h>
#include <Common/CurrentThread.h>

View File

@ -8,8 +8,6 @@
#include <condition_variable>
class MemoryTracker;
namespace DB
{

View File

@ -11,7 +11,6 @@
#include <DataStreams/IBlockInputStream.h>
#include <Common/setThreadName.h>
#include <Common/CurrentMetrics.h>
#include <Common/MemoryTracker.h>
#include <Common/CurrentThread.h>
#include <Common/ThreadPool.h>

View File

@ -0,0 +1,208 @@
#include <DataStreams/TTLBlockInputStream.h>
#include <DataTypes/DataTypeDate.h>
#include <Interpreters/evaluateMissingDefaults.h>
#include <Interpreters/SyntaxAnalyzer.h>
#include <Interpreters/ExpressionAnalyzer.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
TTLBlockInputStream::TTLBlockInputStream(
const BlockInputStreamPtr & input_,
const MergeTreeData & storage_,
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time_)
: storage(storage_)
, data_part(data_part_)
, current_time(current_time_)
, old_ttl_infos(data_part->ttl_infos)
, log(&Logger::get(storage.getLogName() + " (TTLBlockInputStream)"))
, date_lut(DateLUT::instance())
{
children.push_back(input_);
const auto & column_defaults = storage.getColumns().getDefaults();
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
for (const auto & [name, ttl_info] : old_ttl_infos.columns_ttl)
{
if (ttl_info.min <= current_time)
{
new_ttl_infos.columns_ttl.emplace(name, MergeTreeDataPart::TTLInfo{});
empty_columns.emplace(name);
auto it = column_defaults.find(name);
if (it != column_defaults.end())
default_expr_list->children.emplace_back(
setAlias(it->second.expression, it->first));
}
else
new_ttl_infos.columns_ttl.emplace(name, ttl_info);
}
if (old_ttl_infos.table_ttl.min > current_time)
new_ttl_infos.table_ttl = old_ttl_infos.table_ttl;
if (!default_expr_list->children.empty())
{
auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(
default_expr_list, storage.getColumns().getAllPhysical());
defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true);
}
}
Block TTLBlockInputStream::getHeader() const
{
return children.at(0)->getHeader();
}
Block TTLBlockInputStream::readImpl()
{
Block block = children.at(0)->read();
if (!block)
return block;
if (storage.hasTableTTL())
{
/// Skip all data if table ttl is expired for part
if (old_ttl_infos.table_ttl.max <= current_time)
{
rows_removed = data_part->rows_count;
return {};
}
if (old_ttl_infos.table_ttl.min <= current_time)
removeRowsWithExpiredTableTTL(block);
}
removeValuesWithExpiredColumnTTL(block);
return block;
}
void TTLBlockInputStream::readSuffixImpl()
{
for (const auto & elem : new_ttl_infos.columns_ttl)
new_ttl_infos.updatePartMinTTL(elem.second.min);
new_ttl_infos.updatePartMinTTL(new_ttl_infos.table_ttl.min);
data_part->ttl_infos = std::move(new_ttl_infos);
data_part->empty_columns = std::move(empty_columns);
if (rows_removed)
LOG_INFO(log, "Removed " << rows_removed << " rows with expired ttl from part " << data_part->name);
}
void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block)
{
storage.ttl_table_entry.expression->execute(block);
const auto & current = block.getByName(storage.ttl_table_entry.result_column);
const IColumn * ttl_column = current.column.get();
MutableColumns result_columns;
result_columns.reserve(getHeader().columns());
for (const auto & name : storage.getColumns().getNamesOfPhysical())
{
auto & column_with_type = block.getByName(name);
const IColumn * values_column = column_with_type.column.get();
MutableColumnPtr result_column = values_column->cloneEmpty();
result_column->reserve(block.rows());
for (size_t i = 0; i < block.rows(); ++i)
{
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
if (cur_ttl > current_time)
{
new_ttl_infos.table_ttl.update(cur_ttl);
result_column->insertFrom(*values_column, i);
}
else
++rows_removed;
}
result_columns.emplace_back(std::move(result_column));
}
block = getHeader().cloneWithColumns(std::move(result_columns));
}
void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block)
{
Block block_with_defaults;
if (defaults_expression)
{
block_with_defaults = block;
defaults_expression->execute(block_with_defaults);
}
for (const auto & [name, ttl_entry] : storage.ttl_entries_by_name)
{
const auto & old_ttl_info = old_ttl_infos.columns_ttl[name];
auto & new_ttl_info = new_ttl_infos.columns_ttl[name];
if (old_ttl_info.min > current_time)
continue;
if (old_ttl_info.max <= current_time)
continue;
if (!block.has(ttl_entry.result_column))
ttl_entry.expression->execute(block);
ColumnPtr default_column = nullptr;
if (block_with_defaults.has(name))
default_column = block_with_defaults.getByName(name).column->convertToFullColumnIfConst();
auto & column_with_type = block.getByName(name);
const IColumn * values_column = column_with_type.column.get();
MutableColumnPtr result_column = values_column->cloneEmpty();
result_column->reserve(block.rows());
const auto & current = block.getByName(ttl_entry.result_column);
const IColumn * ttl_column = current.column.get();
for (size_t i = 0; i < block.rows(); ++i)
{
UInt32 cur_ttl = getTimestampByIndex(ttl_column, i);
if (cur_ttl <= current_time)
{
if (default_column)
result_column->insertFrom(*default_column, i);
else
result_column->insertDefault();
}
else
{
new_ttl_info.update(cur_ttl);
empty_columns.erase(name);
result_column->insertFrom(*values_column, i);
}
}
column_with_type.column = std::move(result_column);
}
for (const auto & elem : storage.ttl_entries_by_name)
if (block.has(elem.second.result_column))
block.erase(elem.second.result_column);
}
UInt32 TTLBlockInputStream::getTimestampByIndex(const IColumn * column, size_t ind)
{
if (const ColumnUInt16 * column_date = typeid_cast<const ColumnUInt16 *>(column))
return date_lut.fromDayNum(DayNum(column_date->getData()[ind]));
else if (const ColumnUInt32 * column_date_time = typeid_cast<const ColumnUInt32 *>(column))
return column_date_time->getData()[ind];
else
throw Exception("Unexpected type of result ttl column", ErrorCodes::LOGICAL_ERROR);
}
}

View File

@ -0,0 +1,60 @@
#pragma once
#include <DataStreams/IBlockInputStream.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/MergeTreeDataPart.h>
#include <Core/Block.h>
#include <common/DateLUT.h>
namespace DB
{
class TTLBlockInputStream : public IBlockInputStream
{
public:
TTLBlockInputStream(
const BlockInputStreamPtr & input_,
const MergeTreeData & storage_,
const MergeTreeData::MutableDataPartPtr & data_part_,
time_t current_time
);
String getName() const override { return "TTLBlockInputStream"; }
Block getHeader() const override;
protected:
Block readImpl() override;
/// Finalizes ttl infos and updates data part
void readSuffixImpl() override;
private:
const MergeTreeData & storage;
/// ttl_infos and empty_columns are updating while reading
const MergeTreeData::MutableDataPartPtr & data_part;
time_t current_time;
MergeTreeDataPart::TTLInfos old_ttl_infos;
MergeTreeDataPart::TTLInfos new_ttl_infos;
NameSet empty_columns;
size_t rows_removed = 0;
Logger * log;
DateLUTImpl date_lut;
std::unordered_map<String, String> defaults_result_column;
ExpressionActionsPtr defaults_expression;
private:
/// Removes values with expired ttl and computes new min_ttl and empty_columns for part
void removeValuesWithExpiredColumnTTL(Block & block);
/// Remove rows with expired table ttl and computes new min_ttl for part
void removeRowsWithExpiredTableTTL(Block & block);
UInt32 getTimestampByIndex(const IColumn * column, size_t ind);
};
}

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
namespace
{
class DataTypeDomanIPv4 : public DataTypeDomainWithSimpleSerialization
class DataTypeDomainIPv4 : public DataTypeDomainWithSimpleSerialization
{
public:
const char * getName() const override
@ -63,7 +63,7 @@ public:
}
};
class DataTypeDomanIPv6 : public DataTypeDomainWithSimpleSerialization
class DataTypeDomainIPv6 : public DataTypeDomainWithSimpleSerialization
{
public:
const char * getName() const override
@ -111,8 +111,8 @@ public:
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
{
factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomanIPv4>());
factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomanIPv6>());
factory.registerDataTypeDomain("UInt32", std::make_unique<DataTypeDomainIPv4>());
factory.registerDataTypeDomain("FixedString(16)", std::make_unique<DataTypeDomainIPv6>());
}
} // namespace DB

View File

@ -690,10 +690,9 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
};
if (!settings.continuous_reading)
{
low_cardinality_state->num_pending_rows = 0;
if (!settings.continuous_reading)
{
/// Remember in state that some granules were skipped and we need to update dictionary.
low_cardinality_state->need_update_dictionary = true;
}

View File

@ -14,7 +14,7 @@ namespace DB
class IDataTypeDummy : public DataTypeWithSimpleSerialization
{
private:
void throwNoSerialization() const
[[noreturn]] void throwNoSerialization() const
{
throw Exception("Serialization is not implemented for data type " + getName(), ErrorCodes::NOT_IMPLEMENTED);
}

View File

@ -77,7 +77,7 @@ namespace ProtobufColumnMatcher
namespace details
{
void throwNoCommonColumns();
[[noreturn]] void throwNoCommonColumns();
class ColumnNameMatcher
{

View File

@ -385,73 +385,61 @@ public:
bool readStringInto(PaddedPODArray<UInt8> &) override
{
cannotConvertType("String");
return false;
}
bool readInt8(Int8 &) override
{
cannotConvertType("Int8");
return false;
}
bool readUInt8(UInt8 &) override
{
cannotConvertType("UInt8");
return false;
}
bool readInt16(Int16 &) override
{
cannotConvertType("Int16");
return false;
}
bool readUInt16(UInt16 &) override
{
cannotConvertType("UInt16");
return false;
}
bool readInt32(Int32 &) override
{
cannotConvertType("Int32");
return false;
}
bool readUInt32(UInt32 &) override
{
cannotConvertType("UInt32");
return false;
}
bool readInt64(Int64 &) override
{
cannotConvertType("Int64");
return false;
}
bool readUInt64(UInt64 &) override
{
cannotConvertType("UInt64");
return false;
}
bool readUInt128(UInt128 &) override
{
cannotConvertType("UInt128");
return false;
}
bool readFloat32(Float32 &) override
{
cannotConvertType("Float32");
return false;
}
bool readFloat64(Float64 &) override
{
cannotConvertType("Float64");
return false;
}
void prepareEnumMapping8(const std::vector<std::pair<std::string, Int8>> &) override {}
@ -460,59 +448,50 @@ public:
bool readEnum8(Int8 &) override
{
cannotConvertType("Enum");
return false;
}
bool readEnum16(Int16 &) override
{
cannotConvertType("Enum");
return false;
}
bool readUUID(UUID &) override
{
cannotConvertType("UUID");
return false;
}
bool readDate(DayNum &) override
{
cannotConvertType("Date");
return false;
}
bool readDateTime(time_t &) override
{
cannotConvertType("DateTime");
return false;
}
bool readDecimal32(Decimal32 &, UInt32, UInt32) override
{
cannotConvertType("Decimal32");
return false;
}
bool readDecimal64(Decimal64 &, UInt32, UInt32) override
{
cannotConvertType("Decimal64");
return false;
}
bool readDecimal128(Decimal128 &, UInt32, UInt32) override
{
cannotConvertType("Decimal128");
return false;
}
bool readAggregateFunction(const AggregateFunctionPtr &, AggregateDataPtr, Arena &) override
{
cannotConvertType("AggregateFunction");
return false;
}
protected:
void cannotConvertType(const String & type_name)
[[noreturn]] void cannotConvertType(const String & type_name)
{
throw Exception(
String("Could not convert type '") + field->type_name() + "' from protobuf field '" + field->name() + "' to data type '"
@ -520,7 +499,7 @@ protected:
ErrorCodes::PROTOBUF_BAD_CAST);
}
void cannotConvertValue(const String & value, const String & type_name)
[[noreturn]] void cannotConvertValue(const String & value, const String & type_name)
{
throw Exception(
"Could not convert value '" + value + "' from protobuf field '" + field->name() + "' to data type '" + type_name + "'",
@ -557,7 +536,6 @@ protected:
catch (...)
{
cannotConvertValue(StringRef(str.data(), str.size()).toString(), TypeName<To>::get());
__builtin_unreachable();
}
}

View File

@ -334,14 +334,14 @@ public:
virtual void writeAggregateFunction(const AggregateFunctionPtr &, ConstAggregateDataPtr) override { cannotConvertType("AggregateFunction"); }
protected:
void cannotConvertType(const String & type_name)
[[noreturn]] void cannotConvertType(const String & type_name)
{
throw Exception(
"Could not convert data type '" + type_name + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",
ErrorCodes::PROTOBUF_BAD_CAST);
}
void cannotConvertValue(const String & value)
[[noreturn]] void cannotConvertValue(const String & value)
{
throw Exception(
"Could not convert value '" + value + "' to protobuf type '" + field->type_name() + "' (field: " + field->name() + ")",

View File

@ -423,7 +423,7 @@ inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, Read
/** Throw exception with verbose message when string value is not parsed completely.
*/
void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result);
[[noreturn]] void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result);
enum class ConvertFromStringExceptionMode

View File

@ -520,7 +520,7 @@ void resizeDynamicSize(ArraySource && array_source, ValueSource && value_source,
while (!sink.isEnd())
{
size_t row_num = array_source.rowNum();
bool has_size = !size_null_map || (size_null_map && (*size_null_map)[row_num]);
bool has_size = !size_null_map || (*size_null_map)[row_num];
if (has_size)
{

View File

@ -8,12 +8,14 @@
#include <Columns/ColumnNullable.h>
#include <Common/typeid_cast.h>
#include <Common/UTF8Helpers.h>
#include <Functions/GatherUtils/IArraySource.h>
#include <Functions/GatherUtils/IValueSource.h>
#include <Functions/GatherUtils/Slices.h>
#include <Functions/FunctionHelpers.h>
namespace DB
{
@ -276,6 +278,92 @@ struct StringSource
};
/// Differs to StringSource by having 'offest' and 'length' in code points instead of bytes in getSlice* methods.
/** NOTE: The behaviour of substring and substringUTF8 is inconsistent when negative offset is greater than string size:
* substring:
* hello
* ^-----^ - offset -10, length 7, result: "he"
* substringUTF8:
* hello
* ^-----^ - offset -10, length 7, result: "hello"
* This may be subject for change.
*/
struct UTF8StringSource : public StringSource
{
using StringSource::StringSource;
static const ColumnString::Char * skipCodePointsForward(const ColumnString::Char * pos, size_t size, const ColumnString::Char * end)
{
for (size_t i = 0; i < size && pos < end; ++i)
pos += UTF8::seqLength(*pos); /// NOTE pos may become greater than end. It is Ok due to padding in PaddedPODArray.
return pos;
}
static const ColumnString::Char * skipCodePointsBackward(const ColumnString::Char * pos, size_t size, const ColumnString::Char * begin)
{
for (size_t i = 0; i < size && pos > begin; ++i)
{
--pos;
if (pos == begin)
break;
UTF8::syncBackward(pos, begin);
}
return pos;
}
Slice getSliceFromLeft(size_t offset) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsForward(begin, offset, end);
if (res_begin >= end)
return {begin, 0};
return {res_begin, size_t(end - res_begin)};
}
Slice getSliceFromLeft(size_t offset, size_t length) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsForward(begin, offset, end);
if (res_begin >= end)
return {begin, 0};
auto res_end = skipCodePointsForward(res_begin, length, end);
if (res_end >= end)
return {res_begin, size_t(end - res_begin)};
return {res_begin, size_t(res_end - res_begin)};
}
Slice getSliceFromRight(size_t offset) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsBackward(end, offset, begin);
return {res_begin, size_t(end - res_begin)};
}
Slice getSliceFromRight(size_t offset, size_t length) const
{
auto begin = &elements[prev_offset];
auto end = elements.data() + offsets[row_num] - 1;
auto res_begin = skipCodePointsBackward(end, offset, begin);
auto res_end = skipCodePointsForward(res_begin, length, end);
if (res_end >= end)
return {res_begin, size_t(end - res_begin)};
return {res_begin, size_t(res_end - res_begin)};
}
};
struct FixedStringSource
{
using Slice = NumericArraySlice<UInt8>;

View File

@ -432,15 +432,20 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
current_has_nullable = true;
else
{
typename Map::mapped_type * value = nullptr;
if constexpr (is_numeric_column)
++map[columns[arg]->getElement(i)];
value = &map[columns[arg]->getElement(i)];
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
++map[columns[arg]->getDataAt(i)];
value = &map[columns[arg]->getDataAt(i)];
else
{
const char * data = nullptr;
++map[columns[arg]->serializeValueIntoArena(i, arena, data)];
value = &map[columns[arg]->serializeValueIntoArena(i, arena, data)];
}
if (*value == arg)
++(*value);
}
}

View File

@ -61,7 +61,7 @@ public:
{
auto array_size = col_num->getInt(i);
if (unlikely(array_size) < 0)
if (unlikely(array_size < 0))
throw Exception("Array size cannot be negative: while executing function " + getName(), ErrorCodes::TOO_LARGE_ARRAY_SIZE);
offset += array_size;

View File

@ -153,7 +153,7 @@ template <typename A, typename B>
struct NumIfImpl<A, B, NumberTraits::Error>
{
private:
static void throw_error()
[[noreturn]] static void throw_error()
{
throw Exception("Internal logic error: invalid types of arguments 2 and 3 of if", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}

View File

@ -0,0 +1,330 @@
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionStringOrArrayToT.h>
#include <cstring>
#ifdef __SSE4_1__
# include <emmintrin.h>
# include <smmintrin.h>
# include <tmmintrin.h>
#endif
namespace DB
{
/// inspired by https://github.com/cyb70289/utf8/
struct ValidUTF8Impl
{
/*
MIT License
Copyright (c) 2019 Yibo Cai
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
/*
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
*
* Table 3-7. Well-Formed UTF-8 Byte Sequences
*
* +--------------------+------------+-------------+------------+-------------+
* | Code Points | First Byte | Second Byte | Third Byte | Fourth Byte |
* +--------------------+------------+-------------+------------+-------------+
* | U+0000..U+007F | 00..7F | | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0080..U+07FF | C2..DF | 80..BF | | |
* +--------------------+------------+-------------+------------+-------------+
* | U+0800..U+0FFF | E0 | A0..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+1000..U+CFFF | E1..EC | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+D000..U+D7FF | ED | 80..9F | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+E000..U+FFFF | EE..EF | 80..BF | 80..BF | |
* +--------------------+------------+-------------+------------+-------------+
* | U+10000..U+3FFFF | F0 | 90..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+40000..U+FFFFF | F1..F3 | 80..BF | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
* | U+100000..U+10FFFF | F4 | 80..8F | 80..BF | 80..BF |
* +--------------------+------------+-------------+------------+-------------+
*/
static inline UInt8 isValidUTF8Naive(const UInt8 * data, UInt64 len)
{
while (len)
{
int bytes;
const UInt8 byte1 = data[0];
/* 00..7F */
if (byte1 <= 0x7F)
{
bytes = 1;
}
/* C2..DF, 80..BF */
else if (len >= 2 && byte1 >= 0xC2 && byte1 <= 0xDF && static_cast<Int8>(data[1]) <= static_cast<Int8>(0xBF))
{
bytes = 2;
}
else if (len >= 3)
{
const UInt8 byte2 = data[1];
bool byte2_ok = static_cast<Int8>(byte2) <= static_cast<Int8>(0xBF);
bool byte3_ok = static_cast<Int8>(data[2]) <= static_cast<Int8>(0xBF);
if (byte2_ok && byte3_ok &&
/* E0, A0..BF, 80..BF */
((byte1 == 0xE0 && byte2 >= 0xA0) ||
/* E1..EC, 80..BF, 80..BF */
(byte1 >= 0xE1 && byte1 <= 0xEC) ||
/* ED, 80..9F, 80..BF */
(byte1 == 0xED && byte2 <= 0x9F) ||
/* EE..EF, 80..BF, 80..BF */
(byte1 >= 0xEE && byte1 <= 0xEF)))
{
bytes = 3;
}
else if (len >= 4)
{
bool byte4_ok = static_cast<Int8>(data[3]) <= static_cast<Int8>(0xBF);
if (byte2_ok && byte3_ok && byte4_ok &&
/* F0, 90..BF, 80..BF, 80..BF */
((byte1 == 0xF0 && byte2 >= 0x90) ||
/* F1..F3, 80..BF, 80..BF, 80..BF */
(byte1 >= 0xF1 && byte1 <= 0xF3) ||
/* F4, 80..8F, 80..BF, 80..BF */
(byte1 == 0xF4 && byte2 <= 0x8F)))
{
bytes = 4;
}
else
{
return false;
}
}
else
{
return false;
}
}
else
{
return false;
}
len -= bytes;
data += bytes;
}
return true;
}
#ifndef __SSE4_1__
static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len) { return isValidUTF8Naive(data, len); }
#else
static inline UInt8 isValidUTF8(const UInt8 * data, UInt64 len)
{
/*
* Map high nibble of "First Byte" to legal character length minus 1
* 0x00 ~ 0xBF --> 0
* 0xC0 ~ 0xDF --> 1
* 0xE0 ~ 0xEF --> 2
* 0xF0 ~ 0xFF --> 3
*/
const __m128i first_len_tbl = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 3);
/* Map "First Byte" to 8-th item of range table (0xC2 ~ 0xF4) */
const __m128i first_range_tbl = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8);
/*
* Range table, map range index to min and max values
*/
const __m128i range_min_tbl
= _mm_setr_epi8(0x00, 0x80, 0x80, 0x80, 0xA0, 0x80, 0x90, 0x80, 0xC2, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F);
const __m128i range_max_tbl
= _mm_setr_epi8(0x7F, 0xBF, 0xBF, 0xBF, 0xBF, 0x9F, 0xBF, 0x8F, 0xF4, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
/*
* Tables for fast handling of four special First Bytes(E0,ED,F0,F4), after
* which the Second Byte are not 80~BF. It contains "range index adjustment".
* +------------+---------------+------------------+----------------+
* | First Byte | original range| range adjustment | adjusted range |
* +------------+---------------+------------------+----------------+
* | E0 | 2 | 2 | 4 |
* +------------+---------------+------------------+----------------+
* | ED | 2 | 3 | 5 |
* +------------+---------------+------------------+----------------+
* | F0 | 3 | 3 | 6 |
* +------------+---------------+------------------+----------------+
* | F4 | 4 | 4 | 8 |
* +------------+---------------+------------------+----------------+
*/
/* index1 -> E0, index14 -> ED */
const __m128i df_ee_tbl = _mm_setr_epi8(0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0);
/* index1 -> F0, index5 -> F4 */
const __m128i ef_fe_tbl = _mm_setr_epi8(0, 3, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
__m128i prev_input = _mm_set1_epi8(0);
__m128i prev_first_len = _mm_set1_epi8(0);
__m128i error = _mm_set1_epi8(0);
auto check_packed = [&](__m128i input) noexcept
{
/* high_nibbles = input >> 4 */
const __m128i high_nibbles = _mm_and_si128(_mm_srli_epi16(input, 4), _mm_set1_epi8(0x0F));
/* first_len = legal character length minus 1 */
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
/* first_len = first_len_tbl[high_nibbles] */
__m128i first_len = _mm_shuffle_epi8(first_len_tbl, high_nibbles);
/* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
/* range = first_range_tbl[high_nibbles] */
__m128i range = _mm_shuffle_epi8(first_range_tbl, high_nibbles);
/* Second Byte: set range index to first_len */
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
/* range |= (first_len, prev_first_len) << 1 byte */
range = _mm_or_si128(range, _mm_alignr_epi8(first_len, prev_first_len, 15));
/* Third Byte: set range index to saturate_sub(first_len, 1) */
/* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
__m128i tmp1;
__m128i tmp2;
/* tmp1 = saturate_sub(first_len, 1) */
tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(1));
/* tmp2 = saturate_sub(prev_first_len, 1) */
tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(1));
/* range |= (tmp1, tmp2) << 2 bytes */
range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 14));
/* Fourth Byte: set range index to saturate_sub(first_len, 2) */
/* 0 for 00~7F, 0 for C0~DF, 0 for E0~EF, 1 for F0~FF */
/* tmp1 = saturate_sub(first_len, 2) */
tmp1 = _mm_subs_epu8(first_len, _mm_set1_epi8(2));
/* tmp2 = saturate_sub(prev_first_len, 2) */
tmp2 = _mm_subs_epu8(prev_first_len, _mm_set1_epi8(2));
/* range |= (tmp1, tmp2) << 3 bytes */
range = _mm_or_si128(range, _mm_alignr_epi8(tmp1, tmp2, 13));
/*
* Now we have below range indices caluclated
* Correct cases:
* - 8 for C0~FF
* - 3 for 1st byte after F0~FF
* - 2 for 1st byte after E0~EF or 2nd byte after F0~FF
* - 1 for 1st byte after C0~DF or 2nd byte after E0~EF or
* 3rd byte after F0~FF
* - 0 for others
* Error cases:
* 9,10,11 if non ascii First Byte overlaps
* E.g., F1 80 C2 90 --> 8 3 10 2, where 10 indicates error
*/
/* Adjust Second Byte range for special First Bytes(E0,ED,F0,F4) */
/* Overlaps lead to index 9~15, which are illegal in range table */
__m128i shift1, pos, range2;
/* shift1 = (input, prev_input) << 1 byte */
shift1 = _mm_alignr_epi8(input, prev_input, 15);
pos = _mm_sub_epi8(shift1, _mm_set1_epi8(0xEF));
/*
* shift1: | EF F0 ... FE | FF 00 ... ... DE | DF E0 ... EE |
* pos: | 0 1 15 | 16 17 239| 240 241 255|
* pos-240: | 0 0 0 | 0 0 0 | 0 1 15 |
* pos+112: | 112 113 127| >= 128 | >= 128 |
*/
tmp1 = _mm_subs_epu8(pos, _mm_set1_epi8(240));
range2 = _mm_shuffle_epi8(df_ee_tbl, tmp1);
tmp2 = _mm_adds_epu8(pos, _mm_set1_epi8(112));
range2 = _mm_add_epi8(range2, _mm_shuffle_epi8(ef_fe_tbl, tmp2));
range = _mm_add_epi8(range, range2);
/* Load min and max values per calculated range index */
__m128i minv = _mm_shuffle_epi8(range_min_tbl, range);
__m128i maxv = _mm_shuffle_epi8(range_max_tbl, range);
/* Check value range */
error = _mm_or_si128(error, _mm_cmplt_epi8(input, minv));
error = _mm_or_si128(error, _mm_cmpgt_epi8(input, maxv));
prev_input = input;
prev_first_len = first_len;
data += 16;
len -= 16;
};
while (len >= 16)
check_packed(_mm_loadu_si128(reinterpret_cast<const __m128i *>(data)));
/// 0 <= len <= 15 for now. Reading data from data - 1 because of right padding of 15 and left padding
/// Then zero some bytes from the unknown memory and check again.
alignas(16) char buf[32];
_mm_store_si128(reinterpret_cast<__m128i *>(buf), _mm_loadu_si128(reinterpret_cast<const __m128i *>(data - 1)));
memset(buf + len + 1, 0, 16);
check_packed(_mm_loadu_si128(reinterpret_cast<__m128i *>(buf + 1)));
/* Reduce error vector, error_reduced = 0xFFFF if error == 0 */
return _mm_movemask_epi8(_mm_cmpeq_epi8(error, _mm_set1_epi8(0))) == 0xFFFF;
}
#endif
static constexpr bool is_fixed_to_constant = false;
static void vector(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt8> & res)
{
size_t size = offsets.size();
size_t prev_offset = 0;
for (size_t i = 0; i < size; ++i)
{
res[i] = isValidUTF8(data.data() + prev_offset, offsets[i] - 1 - prev_offset);
prev_offset = offsets[i];
}
}
static void vector_fixed_to_constant(const ColumnString::Chars & /*data*/, size_t /*n*/, UInt8 & /*res*/) {}
static void vector_fixed_to_vector(const ColumnString::Chars & data, size_t n, PaddedPODArray<UInt8> & res)
{
size_t size = data.size() / n;
for (size_t i = 0; i < size; ++i)
res[i] = isValidUTF8(data.data() + i * n, n);
}
static void array(const ColumnString::Offsets &, PaddedPODArray<UInt8> &)
{
throw Exception("Cannot apply function isValidUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
};
struct NameValidUTF8
{
static constexpr auto name = "isValidUTF8";
};
using FunctionValidUTF8 = FunctionStringOrArrayToT<ValidUTF8Impl, NameValidUTF8, UInt8>;
void registerFunctionValidUTF8(FunctionFactory & factory)
{
factory.registerFunction<FunctionValidUTF8>();
}
}

View File

@ -17,15 +17,15 @@ struct ExtractQueryStringAndFragment
res_data = data;
res_size = 0;
Pos pos = data;
Pos end = pos + size;
Pos end = data + size;
Pos pos;
if (end != (pos = find_first_symbols<'?'>(pos, end)))
if (end != (pos = find_first_symbols<'?'>(data, end)))
{
res_data = pos + (without_leading_char ? 1 : 0);
res_size = end - res_data;
}
else if (end != (pos = find_first_symbols<'#'>(pos, end)))
else if (end != (pos = find_first_symbols<'#'>(data, end)))
{
res_data = pos;
res_size = end - res_data;

View File

@ -9,6 +9,7 @@ void registerFunctionEmpty(FunctionFactory &);
void registerFunctionNotEmpty(FunctionFactory &);
void registerFunctionLength(FunctionFactory &);
void registerFunctionLengthUTF8(FunctionFactory &);
void registerFunctionValidUTF8(FunctionFactory &);
void registerFunctionLower(FunctionFactory &);
void registerFunctionUpper(FunctionFactory &);
void registerFunctionLowerUTF8(FunctionFactory &);
@ -17,7 +18,6 @@ void registerFunctionReverse(FunctionFactory &);
void registerFunctionReverseUTF8(FunctionFactory &);
void registerFunctionsConcat(FunctionFactory &);
void registerFunctionSubstring(FunctionFactory &);
void registerFunctionSubstringUTF8(FunctionFactory &);
void registerFunctionAppendTrailingCharIfAbsent(FunctionFactory &);
void registerFunctionStartsWith(FunctionFactory &);
void registerFunctionEndsWith(FunctionFactory &);
@ -36,6 +36,7 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionNotEmpty(factory);
registerFunctionLength(factory);
registerFunctionLengthUTF8(factory);
registerFunctionValidUTF8(factory);
registerFunctionLower(factory);
registerFunctionUpper(factory);
registerFunctionLowerUTF8(factory);
@ -44,7 +45,6 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionReverseUTF8(factory);
registerFunctionsConcat(factory);
registerFunctionSubstring(factory);
registerFunctionSubstringUTF8(factory);
registerFunctionAppendTrailingCharIfAbsent(factory);
registerFunctionStartsWith(factory);
registerFunctionEndsWith(factory);

View File

@ -28,10 +28,13 @@ namespace ErrorCodes
}
/// If 'is_utf8' - measure offset and length in code points instead of bytes.
/// UTF8 variant is not available for FixedString arguments.
template <bool is_utf8>
class FunctionSubstring : public IFunction
{
public:
static constexpr auto name = "substring";
static constexpr auto name = is_utf8 ? "substringUTF8" : "substring";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionSubstring>();
@ -56,7 +59,7 @@ public:
+ toString(number_of_arguments) + ", should be 2 or 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (!isStringOrFixedString(arguments[0]))
if ((is_utf8 && !isString(arguments[0])) || !isStringOrFixedString(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isNumber(arguments[1]))
@ -80,7 +83,7 @@ public:
Int64 start_value, Int64 length_value, Block & block, size_t result, Source && source,
size_t input_rows_count)
{
auto col_res = ColumnString::create();
auto col_res = ColumnString::create();
if (!column_length)
{
@ -145,30 +148,48 @@ public:
throw Exception("Third argument provided for function substring could not be negative.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
}
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, StringSource(*col), input_rows_count);
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, FixedStringSource(*col_fixed), input_rows_count);
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, ConstSource<StringSource>(*col_const), input_rows_count);
else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
if constexpr (is_utf8)
{
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, UTF8StringSource(*col), input_rows_count);
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, ConstSource<UTF8StringSource>(*col_const), input_rows_count);
else
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
else
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
{
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, StringSource(*col), input_rows_count);
else if (const ColumnFixedString * col_fixed = checkAndGetColumn<ColumnFixedString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, FixedStringSource(*col_fixed), input_rows_count);
else if (const ColumnConst * col_const = checkAndGetColumnConst<ColumnString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, ConstSource<StringSource>(*col_const), input_rows_count);
else if (const ColumnConst * col_const_fixed = checkAndGetColumnConst<ColumnFixedString>(column_string.get()))
executeForSource(column_start, column_length, column_start_const, column_length_const, start_value,
length_value, block, result, ConstSource<FixedStringSource>(*col_const_fixed), input_rows_count);
else
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
}
};
void registerFunctionSubstring(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubstring>(FunctionFactory::CaseInsensitive);
factory.registerAlias("substr", FunctionSubstring::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("mid", FunctionSubstring::name, FunctionFactory::CaseInsensitive); /// from MySQL dialect
factory.registerFunction<FunctionSubstring<false>>(FunctionFactory::CaseInsensitive);
factory.registerAlias("substr", "substring", FunctionFactory::CaseInsensitive);
factory.registerAlias("mid", "substring", FunctionFactory::CaseInsensitive); /// from MySQL dialect
factory.registerFunction<FunctionSubstring<true>>(FunctionFactory::CaseSensitive);
}
}

View File

@ -1,166 +0,0 @@
#include <DataTypes/DataTypeString.h>
#include <Columns/ColumnString.h>
#include <Core/ColumnNumbers.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int ARGUMENT_OUT_OF_BOUND;
}
/** If the string is encoded in UTF-8, then it selects a substring of code points in it.
* Otherwise, the behavior is undefined.
*/
struct SubstringUTF8Impl
{
static void vector(const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
size_t start,
size_t length,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
res_data.reserve(data.size());
size_t size = offsets.size();
res_offsets.resize(size);
ColumnString::Offset prev_offset = 0;
ColumnString::Offset res_offset = 0;
for (size_t i = 0; i < size; ++i)
{
ColumnString::Offset j = prev_offset;
ColumnString::Offset pos = 1;
ColumnString::Offset bytes_start = 0;
ColumnString::Offset bytes_length = 0;
while (j < offsets[i] - 1)
{
if (pos == start)
bytes_start = j - prev_offset + 1;
if (data[j] < 0xBF)
j += 1;
else if (data[j] < 0xE0)
j += 2;
else if (data[j] < 0xF0)
j += 3;
else
j += 1;
if (pos >= start && pos < start + length)
bytes_length = j - prev_offset + 1 - bytes_start;
else if (pos >= start + length)
break;
++pos;
}
if (bytes_start == 0)
{
res_data.resize(res_data.size() + 1);
res_data[res_offset] = 0;
++res_offset;
}
else
{
size_t bytes_to_copy = std::min(offsets[i] - prev_offset - bytes_start, bytes_length);
res_data.resize(res_data.size() + bytes_to_copy + 1);
memcpySmallAllowReadWriteOverflow15(&res_data[res_offset], &data[prev_offset + bytes_start - 1], bytes_to_copy);
res_offset += bytes_to_copy + 1;
res_data[res_offset - 1] = 0;
}
res_offsets[i] = res_offset;
prev_offset = offsets[i];
}
}
};
class FunctionSubstringUTF8 : public IFunction
{
public:
static constexpr auto name = "substringUTF8";
static FunctionPtr create(const Context &)
{
return std::make_shared<FunctionSubstringUTF8>();
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 3;
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isNumber(arguments[1]) || !isNumber(arguments[2]))
throw Exception("Illegal type " + (isNumber(arguments[1]) ? arguments[2]->getName() : arguments[1]->getName())
+ " of argument of function "
+ getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return std::make_shared<DataTypeString>();
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
const ColumnPtr column_string = block.getByPosition(arguments[0]).column;
const ColumnPtr column_start = block.getByPosition(arguments[1]).column;
const ColumnPtr column_length = block.getByPosition(arguments[2]).column;
if (!column_start->isColumnConst() || !column_length->isColumnConst())
throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants.", ErrorCodes::ILLEGAL_COLUMN);
Field start_field = (*block.getByPosition(arguments[1]).column)[0];
Field length_field = (*block.getByPosition(arguments[2]).column)[0];
if (start_field.getType() != Field::Types::UInt64 || length_field.getType() != Field::Types::UInt64)
throw Exception("2nd and 3rd arguments of function " + getName() + " must be non-negative and must have UInt type.", ErrorCodes::ILLEGAL_COLUMN);
UInt64 start = start_field.get<UInt64>();
UInt64 length = length_field.get<UInt64>();
if (start == 0)
throw Exception("Second argument of function substring must be greater than 0.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
/// Otherwise may lead to overflow and pass bounds check inside inner loop.
if (start >= 0x8000000000000000ULL || length >= 0x8000000000000000ULL)
throw Exception("Too large values of 2nd or 3rd argument provided for function substring.", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
if (const ColumnString * col = checkAndGetColumn<ColumnString>(column_string.get()))
{
auto col_res = ColumnString::create();
SubstringUTF8Impl::vector(col->getChars(), col->getOffsets(), start, length, col_res->getChars(), col_res->getOffsets());
block.getByPosition(result).column = std::move(col_res);
}
else
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of first argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
void registerFunctionSubstringUTF8(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubstringUTF8>();
}
}

View File

@ -7,6 +7,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BROTLI_READ_FAILED;
}
class BrotliReadBuffer::BrotliStateWrapper
{
public:
@ -29,7 +35,7 @@ public:
BrotliReadBuffer::BrotliReadBuffer(ReadBuffer &in_, size_t buf_size, char *existing_memory, size_t alignment)
: BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
, in(in_)
, brotli(new BrotliStateWrapper())
, brotli(std::make_unique<BrotliStateWrapper>())
, in_available(0)
, in_data(nullptr)
, out_capacity(0)
@ -56,7 +62,7 @@ bool BrotliReadBuffer::nextImpl()
if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in.eof()))
{
throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
}
out_capacity = internal_buffer.size();
@ -76,13 +82,13 @@ bool BrotliReadBuffer::nextImpl()
}
else
{
throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
}
}
if (brotli->result == BROTLI_DECODER_RESULT_ERROR)
{
throw Exception(std::string("brotli decode error"), ErrorCodes::CANNOT_READ_ALL_DATA);
throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED);
}
return true;

View File

@ -34,5 +34,6 @@ private:
bool eof;
};
}

View File

@ -0,0 +1,126 @@
#include <Common/config.h>
#if USE_BROTLI
#include <IO/BrotliWriteBuffer.h>
#include <brotli/encode.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BROTLI_WRITE_FAILED;
}
class BrotliWriteBuffer::BrotliStateWrapper
{
public:
BrotliStateWrapper()
: state(BrotliEncoderCreateInstance(nullptr, nullptr, nullptr))
{
}
~BrotliStateWrapper()
{
BrotliEncoderDestroyInstance(state);
}
public:
BrotliEncoderState * state;
};
BrotliWriteBuffer::BrotliWriteBuffer(WriteBuffer & out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
: BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
, brotli(std::make_unique<BrotliStateWrapper>())
, in_available(0)
, in_data(nullptr)
, out_capacity(0)
, out_data(nullptr)
, out(out_)
{
BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_QUALITY, static_cast<uint32_t>(compression_level));
// Set LZ77 window size. According to brotli sources default value is 24 (c/tools/brotli.c:81)
BrotliEncoderSetParameter(brotli->state, BROTLI_PARAM_LGWIN, 24);
}
BrotliWriteBuffer::~BrotliWriteBuffer()
{
finish();
}
void BrotliWriteBuffer::nextImpl()
{
if (!offset())
{
return;
}
in_data = reinterpret_cast<unsigned char *>(working_buffer.begin());
in_available = offset();
do
{
out.nextIfAtEnd();
out_data = reinterpret_cast<unsigned char *>(out.position());
out_capacity = out.buffer().end() - out.position();
int result = BrotliEncoderCompressStream(
brotli->state,
in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
&in_available,
&in_data,
&out_capacity,
&out_data,
nullptr);
out.position() = out.buffer().end() - out_capacity;
if (result == 0)
{
throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
}
}
while (in_available > 0 || out_capacity == 0);
}
void BrotliWriteBuffer::finish()
{
if (finished)
return;
next();
while (true)
{
out.nextIfAtEnd();
out_data = reinterpret_cast<unsigned char *>(out.position());
out_capacity = out.buffer().end() - out.position();
int result = BrotliEncoderCompressStream(
brotli->state,
BROTLI_OPERATION_FINISH,
&in_available,
&in_data,
&out_capacity,
&out_data,
nullptr);
out.position() = out.buffer().end() - out_capacity;
if (BrotliEncoderIsFinished(brotli->state))
{
finished = true;
return;
}
if (result == 0)
{
throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
}
}
}
}
#endif

View File

@ -0,0 +1,40 @@
#pragma once
#include <IO/WriteBuffer.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
class BrotliWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
{
public:
BrotliWriteBuffer(
WriteBuffer & out_,
int compression_level,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);
~BrotliWriteBuffer() override;
void finish();
private:
void nextImpl() override;
class BrotliStateWrapper;
std::unique_ptr<BrotliStateWrapper> brotli;
size_t in_available;
const uint8_t * in_data;
size_t out_capacity;
uint8_t * out_data;
WriteBuffer & out;
bool finished = false;
};
}

View File

@ -24,7 +24,8 @@ namespace DB
* Differs in that is doesn't do unneeded memset. (And also tries to do as little as possible.)
* Also allows to allocate aligned piece of memory (to use with O_DIRECT, for example).
*/
struct Memory : boost::noncopyable, Allocator<false>
template <typename Allocator = Allocator<false>>
struct Memory : boost::noncopyable, Allocator
{
/// Padding is needed to allow usage of 'memcpySmallAllowReadWriteOverflow15' function with this buffer.
static constexpr size_t pad_right = 15;
@ -136,7 +137,7 @@ template <typename Base>
class BufferWithOwnMemory : public Base
{
protected:
Memory memory;
Memory<> memory;
public:
/// If non-nullptr 'existing_memory' is passed, then buffer will not create its own memory and will use existing_memory without ownership.
BufferWithOwnMemory(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0)

View File

@ -3,7 +3,7 @@
namespace DB
{
enum class ZlibCompressionMethod
enum class CompressionMethod
{
/// DEFLATE compression with gzip header and CRC32 checksum.
/// This option corresponds to files produced by gzip(1) or HTTP Content-Encoding: gzip.
@ -11,6 +11,7 @@ enum class ZlibCompressionMethod
/// DEFLATE compression with zlib header and Adler32 checksum.
/// This option corresponds to HTTP Content-Encoding: deflate.
Zlib,
Brotli,
};
}

View File

@ -179,7 +179,7 @@ private:
*/
virtual bool nextImpl() { return false; }
void throwReadAfterEOF()
[[noreturn]] void throwReadAfterEOF()
{
throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
}

View File

@ -187,6 +187,9 @@ off_t ReadBufferAIO::doSeek(off_t off, int whence)
pos = working_buffer.end();
first_unread_pos_in_file = new_pos_in_file;
/// If we go back, than it's not eof
is_eof = false;
/// We can not use the result of the current asynchronous request.
skip();
}

View File

@ -43,6 +43,7 @@ protected:
ProfileCallback profile_callback;
clockid_t clock_type;
/// Children implementation should be able to seek backwards
virtual off_t doSeek(off_t off, int whence) = 0;
};

View File

@ -164,7 +164,7 @@ void readVectorBinary(std::vector<T> & v, ReadBuffer & buf, size_t MAX_VECTOR_SI
void assertString(const char * s, ReadBuffer & buf);
void assertEOF(ReadBuffer & buf);
void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
[[noreturn]] void throwAtAssertionFailed(const char * s, ReadBuffer & buf);
inline void assertChar(char symbol, ReadBuffer & buf)
{

View File

@ -6,6 +6,11 @@
#include <Common/ProfileEvents.h>
#include <IO/BufferWithOwnMemory.h>
#include <Common/config.h>
#if USE_LFALLOC
#include <Common/LFAllocator.h>
#endif
namespace ProfileEvents
{
@ -20,8 +25,13 @@ namespace DB
struct UncompressedCacheCell
{
Memory data;
#if USE_LFALLOC
Memory<LFAllocator> data;
#else
Memory<> data;
#endif
size_t compressed_size;
UInt32 additional_bytes;
};
struct UncompressedSizeWeightFunction

View File

@ -113,7 +113,7 @@ readVarUInt(T & x, ReadBuffer & istr)
}
inline void throwReadAfterEOF()
[[noreturn]] inline void throwReadAfterEOF()
{
throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF);
}

View File

@ -76,34 +76,47 @@ void WriteBufferFromHTTPServerResponse::nextImpl()
{
if (compress)
{
if (compression_method == ZlibCompressionMethod::Gzip)
if (compression_method == CompressionMethod::Gzip)
{
#if defined(POCO_CLICKHOUSE_PATCH)
*response_header_ostr << "Content-Encoding: gzip\r\n";
#else
response.set("Content-Encoding", "gzip");
response_body_ostr = &(response.send());
#endif
out_raw.emplace(*response_body_ostr);
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
out = &*deflating_buf;
}
else if (compression_method == ZlibCompressionMethod::Zlib)
else if (compression_method == CompressionMethod::Zlib)
{
#if defined(POCO_CLICKHOUSE_PATCH)
*response_header_ostr << "Content-Encoding: deflate\r\n";
#else
response.set("Content-Encoding", "deflate");
response_body_ostr = &(response.send());
#endif
out_raw.emplace(*response_body_ostr);
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
out = &*deflating_buf;
}
else if (compression_method == CompressionMethod::Brotli)
{
#if defined(POCO_CLICKHOUSE_PATCH)
*response_header_ostr << "Content-Encoding: br\r\n";
#else
response.set("Content-Encoding", "br");
response_body_ostr = &(response.send());
#endif
out_raw.emplace(*response_body_ostr);
brotli_buf.emplace(*out_raw, compression_level, working_buffer.size(), working_buffer.begin());
out = &*brotli_buf;
}
else
throw Exception("Logical error: unknown compression method passed to WriteBufferFromHTTPServerResponse",
ErrorCodes::LOGICAL_ERROR);
/// Use memory allocated for the outer buffer in the buffer pointed to by out. This avoids extra allocation and copy.
#if !defined(POCO_CLICKHOUSE_PATCH)
response_body_ostr = &(response.send());
#endif
out_raw.emplace(*response_body_ostr);
deflating_buf.emplace(*out_raw, compression_method, compression_level, working_buffer.size(), working_buffer.begin());
out = &*deflating_buf;
}
else
{
@ -133,7 +146,7 @@ WriteBufferFromHTTPServerResponse::WriteBufferFromHTTPServerResponse(
Poco::Net::HTTPServerResponse & response_,
unsigned keep_alive_timeout_,
bool compress_,
ZlibCompressionMethod compression_method_,
CompressionMethod compression_method_,
size_t size)
: BufferWithOwnMemory<WriteBuffer>(size)
, request(request_)

View File

@ -9,6 +9,7 @@
#include <IO/BufferWithOwnMemory.h>
#include <IO/WriteBufferFromOStream.h>
#include <IO/ZlibDeflatingWriteBuffer.h>
#include <IO/BrotliWriteBuffer.h>
#include <IO/HTTPCommon.h>
#include <IO/Progress.h>
#include <Common/NetException.h>
@ -49,7 +50,7 @@ private:
bool add_cors_header = false;
unsigned keep_alive_timeout = 0;
bool compress = false;
ZlibCompressionMethod compression_method;
CompressionMethod compression_method;
int compression_level = Z_DEFAULT_COMPRESSION;
std::ostream * response_body_ostr = nullptr;
@ -60,6 +61,7 @@ private:
std::optional<WriteBufferFromOStream> out_raw;
std::optional<ZlibDeflatingWriteBuffer> deflating_buf;
std::optional<BrotliWriteBuffer> brotli_buf;
WriteBuffer * out = nullptr; /// Uncompressed HTTP body is written to this buffer. Points to out_raw or possibly to deflating_buf.
@ -89,7 +91,7 @@ public:
Poco::Net::HTTPServerResponse & response_,
unsigned keep_alive_timeout_,
bool compress_ = false, /// If true - set Content-Encoding header and compress the result.
ZlibCompressionMethod compression_method_ = ZlibCompressionMethod::Gzip,
CompressionMethod compression_method_ = CompressionMethod::Gzip,
size_t size = DBMS_DEFAULT_BUFFER_SIZE);
/// Writes progess in repeating HTTP headers.

View File

@ -6,7 +6,7 @@ namespace DB
ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
WriteBuffer & out_,
ZlibCompressionMethod compression_method,
CompressionMethod compression_method,
int compression_level,
size_t buf_size,
char * existing_memory,
@ -23,7 +23,7 @@ ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer(
zstr.avail_out = 0;
int window_bits = 15;
if (compression_method == ZlibCompressionMethod::Gzip)
if (compression_method == CompressionMethod::Gzip)
{
window_bits += 16;
}

View File

@ -2,7 +2,7 @@
#include <IO/WriteBuffer.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ZlibCompressionMethod.h>
#include <IO/CompressionMethod.h>
#include <zlib.h>
@ -21,7 +21,7 @@ class ZlibDeflatingWriteBuffer : public BufferWithOwnMemory<WriteBuffer>
public:
ZlibDeflatingWriteBuffer(
WriteBuffer & out_,
ZlibCompressionMethod compression_method,
CompressionMethod compression_method,
int compression_level,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,

View File

@ -6,7 +6,7 @@ namespace DB
ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
ReadBuffer & in_,
ZlibCompressionMethod compression_method,
CompressionMethod compression_method,
size_t buf_size,
char * existing_memory,
size_t alignment)
@ -23,7 +23,7 @@ ZlibInflatingReadBuffer::ZlibInflatingReadBuffer(
zstr.avail_out = 0;
int window_bits = 15;
if (compression_method == ZlibCompressionMethod::Gzip)
if (compression_method == CompressionMethod::Gzip)
{
window_bits += 16;
}

View File

@ -2,7 +2,7 @@
#include <IO/ReadBuffer.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ZlibCompressionMethod.h>
#include <IO/CompressionMethod.h>
#include <zlib.h>
@ -22,7 +22,7 @@ class ZlibInflatingReadBuffer : public BufferWithOwnMemory<ReadBuffer>
public:
ZlibInflatingReadBuffer(
ReadBuffer & in_,
ZlibCompressionMethod compression_method,
CompressionMethod compression_method,
size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
char * existing_memory = nullptr,
size_t alignment = 0);

View File

@ -0,0 +1,71 @@
#pragma GCC diagnostic ignored "-Wsign-compare"
#ifdef __clang__
#pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
#pragma clang diagnostic ignored "-Wundef"
#endif
#include <gtest/gtest.h>
#include <Core/Defines.h>
#include <port/unistd.h>
#include <IO/ReadBufferAIO.h>
#include <fstream>
namespace
{
std::string createTmpFileForEOFtest()
{
char pattern[] = "/tmp/fileXXXXXX";
char * dir = ::mkdtemp(pattern);
return std::string(dir) + "/foo";
}
void prepare_for_eof(std::string & filename, std::string & buf)
{
static const std::string symbols = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
filename = createTmpFileForEOFtest();
size_t n = 10 * DEFAULT_AIO_FILE_BLOCK_SIZE;
buf.reserve(n);
for (size_t i = 0; i < n; ++i)
buf += symbols[i % symbols.length()];
std::ofstream out(filename.c_str());
out << buf;
}
}
TEST(ReadBufferAIOTest, TestReadAfterAIO)
{
using namespace DB;
std::string data;
std::string file_path;
prepare_for_eof(file_path, data);
ReadBufferAIO testbuf(file_path);
std::string newdata;
newdata.resize(data.length());
size_t total_read = testbuf.read(newdata.data(), newdata.length());
EXPECT_EQ(total_read, data.length());
EXPECT_TRUE(testbuf.eof());
testbuf.seek(data.length() - 100);
std::string smalldata;
smalldata.resize(100);
size_t read_after_eof = testbuf.read(smalldata.data(), smalldata.size());
EXPECT_EQ(read_after_eof, 100);
EXPECT_TRUE(testbuf.eof());
testbuf.seek(0);
std::string repeatdata;
repeatdata.resize(data.length());
size_t read_after_eof_big = testbuf.read(repeatdata.data(), repeatdata.size());
EXPECT_EQ(read_after_eof_big, data.length());
EXPECT_TRUE(testbuf.eof());
}

View File

@ -23,7 +23,7 @@ try
{
DB::WriteBufferFromFile buf("test_zlib_buffers.gz", DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_CREAT | O_TRUNC);
DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::ZlibCompressionMethod::Gzip, /* compression_level = */ 3);
DB::ZlibDeflatingWriteBuffer deflating_buf(buf, DB::CompressionMethod::Gzip, /* compression_level = */ 3);
stopwatch.restart();
for (size_t i = 0; i < n; ++i)
@ -41,7 +41,7 @@ try
{
DB::ReadBufferFromFile buf("test_zlib_buffers.gz");
DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::ZlibCompressionMethod::Gzip);
DB::ZlibInflatingReadBuffer inflating_buf(buf, DB::CompressionMethod::Gzip);
stopwatch.restart();
for (size_t i = 0; i < n; ++i)

View File

@ -71,8 +71,8 @@ private:
void visit(ASTSelectQuery & select, ASTPtr &) const
{
if (select.tables)
tryVisit<ASTTablesInSelectQuery>(select.tables);
if (select.tables())
tryVisit<ASTTablesInSelectQuery>(select.refTables());
visitChildren(select);
}

Some files were not shown because too many files have changed in this diff Show More