Merge branch 'master' into kssenii-rabbit-mq

This commit is contained in:
alesapin 2020-06-24 20:14:28 +03:00
commit 3fc65b3269
1744 changed files with 40920 additions and 16580 deletions

View File

@ -289,8 +289,9 @@ set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
if (MAKE_STATIC_LIBRARIES) if (MAKE_STATIC_LIBRARIES)
set (CMAKE_POSITION_INDEPENDENT_CODE OFF) set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
if (OS_LINUX) if (OS_LINUX AND NOT ARCH_ARM)
# Slightly more efficient code can be generated # Slightly more efficient code can be generated
# It's disabled for ARM because otherwise ClickHouse cannot run on Android.
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie")

View File

@ -13,9 +13,3 @@ ClickHouse is an open-source column-oriented database management system that all
* [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian. * [Yandex.Messenger channel](https://yandex.ru/chat/#/join/20e380d9-c7be-4123-ab06-e95fb946975e) shares announcements and useful links in Russian.
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person. * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
## Upcoming Events
* [ClickHouse Online Meetup (in Russian)](https://events.yandex.ru/events/click-house-onlajn-vs-18-06-2020) on June 18, 2020.
* [ClickHouse Workshop in Novosibirsk](https://2020.codefest.ru/lecture/1628) on TBD date.
* [Yandex C++ Open-Source Sprints in Moscow](https://events.yandex.ru/events/otkrytyj-kod-v-yandek-28-03-2020) on TBD date.

View File

@ -75,7 +75,7 @@ std::string determineDefaultTimeZone()
try try
{ {
tz_database_path = fs::canonical(tz_database_path); tz_database_path = fs::weakly_canonical(tz_database_path);
/// The tzdata file exists. If it is inside the tz_database_dir, /// The tzdata file exists. If it is inside the tz_database_dir,
/// then the relative path is the time zone id. /// then the relative path is the time zone id.
@ -91,7 +91,7 @@ std::string determineDefaultTimeZone()
if (!tz_file_path.is_absolute()) if (!tz_file_path.is_absolute())
tz_file_path = tz_database_path / tz_file_path; tz_file_path = tz_database_path / tz_file_path;
tz_file_path = fs::canonical(tz_file_path); tz_file_path = fs::weakly_canonical(tz_file_path);
fs::path relative_path = tz_file_path.lexically_relative(tz_database_path); fs::path relative_path = tz_file_path.lexically_relative(tz_database_path);
if (!relative_path.empty() && *relative_path.begin() != ".." && *relative_path.begin() != ".") if (!relative_path.empty() && *relative_path.begin() != ".." && *relative_path.begin() != ".")

View File

@ -38,6 +38,7 @@
#include <common/argsToConfig.h> #include <common/argsToConfig.h>
#include <common/getThreadId.h> #include <common/getThreadId.h>
#include <common/coverage.h> #include <common/coverage.h>
#include <common/sleep.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h> #include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
@ -50,6 +51,7 @@
#include <Common/getMultipleKeysFromConfig.h> #include <Common/getMultipleKeysFromConfig.h>
#include <Common/ClickHouseRevision.h> #include <Common/ClickHouseRevision.h>
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
#include <Common/SymbolIndex.h>
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include <Common/config_version.h> # include <Common/config_version.h>
@ -83,7 +85,8 @@ static const size_t signal_pipe_buf_size =
+ sizeof(ucontext_t) + sizeof(ucontext_t)
+ sizeof(StackTrace) + sizeof(StackTrace)
+ sizeof(UInt32) + sizeof(UInt32)
+ max_query_id_size + 1; /// query_id + varint encoded length + max_query_id_size + 1 /// query_id + varint encoded length
+ sizeof(void*);
using signal_function = void(int, siginfo_t*, void*); using signal_function = void(int, siginfo_t*, void*);
@ -133,13 +136,14 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
DB::writePODBinary(stack_trace, out); DB::writePODBinary(stack_trace, out);
DB::writeBinary(UInt32(getThreadId()), out); DB::writeBinary(UInt32(getThreadId()), out);
DB::writeStringBinary(query_id, out); DB::writeStringBinary(query_id, out);
DB::writePODBinary(DB::current_thread, out);
out.next(); out.next();
if (sig != SIGTSTP) /// This signal is used for debugging. if (sig != SIGTSTP) /// This signal is used for debugging.
{ {
/// The time that is usually enough for separate thread to print info into log. /// The time that is usually enough for separate thread to print info into log.
::sleep(10); sleepForSeconds(10);
call_default_signal_handler(sig); call_default_signal_handler(sig);
} }
@ -216,16 +220,18 @@ public:
StackTrace stack_trace(NoCapture{}); StackTrace stack_trace(NoCapture{});
UInt32 thread_num; UInt32 thread_num;
std::string query_id; std::string query_id;
DB::ThreadStatus * thread_ptr{};
DB::readPODBinary(info, in); DB::readPODBinary(info, in);
DB::readPODBinary(context, in); DB::readPODBinary(context, in);
DB::readPODBinary(stack_trace, in); DB::readPODBinary(stack_trace, in);
DB::readBinary(thread_num, in); DB::readBinary(thread_num, in);
DB::readBinary(query_id, in); DB::readBinary(query_id, in);
DB::readPODBinary(thread_ptr, in);
/// This allows to receive more signals if failure happens inside onFault function. /// This allows to receive more signals if failure happens inside onFault function.
/// Example: segfault while symbolizing stack trace. /// Example: segfault while symbolizing stack trace.
std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id); }).detach(); std::thread([=, this] { onFault(sig, info, context, stack_trace, thread_num, query_id, thread_ptr); }).detach();
} }
} }
} }
@ -236,7 +242,8 @@ private:
void onTerminate(const std::string & message, UInt32 thread_num) const void onTerminate(const std::string & message, UInt32 thread_num) const
{ {
LOG_FATAL(log, "(version {}{}) (from thread {}) {}", VERSION_STRING, VERSION_OFFICIAL, thread_num, message); LOG_FATAL(log, "(version {}{}, {}) (from thread {}) {}",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, message);
} }
void onFault( void onFault(
@ -245,21 +252,30 @@ private:
const ucontext_t & context, const ucontext_t & context,
const StackTrace & stack_trace, const StackTrace & stack_trace,
UInt32 thread_num, UInt32 thread_num,
const std::string & query_id) const const std::string & query_id,
DB::ThreadStatus * thread_ptr) const
{ {
DB::ThreadStatus thread_status;
/// Send logs from this thread to client if possible.
/// It will allow client to see failure messages directly.
if (thread_ptr)
{
if (auto logs_queue = thread_ptr->getInternalTextLogsQueue())
DB::CurrentThread::attachInternalTextLogsQueue(logs_queue, DB::LogsLevel::trace);
}
LOG_FATAL(log, "########################################"); LOG_FATAL(log, "########################################");
{
std::stringstream message;
message << "(version " << VERSION_STRING << VERSION_OFFICIAL << ")";
message << " (from thread " << thread_num << ")";
if (query_id.empty()) if (query_id.empty())
message << " (no query)"; {
LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (no query) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, strsignal(sig), sig);
}
else else
message << " (query_id: " << query_id << ")"; {
message << " Received signal " << strsignal(sig) << " (" << sig << ")."; LOG_FATAL(log, "(version {}{}, {}) (from thread {}) (query_id: {}) Received signal {} ({})",
VERSION_STRING, VERSION_OFFICIAL, daemon.build_id_info, thread_num, query_id, strsignal(sig), sig);
LOG_FATAL(log, message.str());
} }
LOG_FATAL(log, signalToErrorMessage(sig, info, context)); LOG_FATAL(log, signalToErrorMessage(sig, info, context));
@ -279,6 +295,10 @@ private:
/// Write symbolized stack trace line by line for better grep-ability. /// Write symbolized stack trace line by line for better grep-ability.
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
/// When everything is done, we will try to send these error messages to client.
if (thread_ptr)
thread_ptr->onFatalError();
} }
}; };
@ -292,17 +312,15 @@ static void sanitizerDeathCallback()
StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe. StringRef query_id = DB::CurrentThread::getQueryId(); /// This is signal safe.
{
std::stringstream message;
message << "(version " << VERSION_STRING << VERSION_OFFICIAL << ")";
message << " (from thread " << getThreadId() << ")";
if (query_id.size == 0) if (query_id.size == 0)
message << " (no query)"; {
LOG_FATAL(log, "(version {}{}) (from thread {}) (no query) Sanitizer trap.",
VERSION_STRING, VERSION_OFFICIAL, getThreadId());
}
else else
message << " (query_id: " << query_id << ")"; {
message << " Sanitizer trap."; LOG_FATAL(log, "(version {}{}) (from thread {}) (query_id: {}) Sanitizer trap.",
VERSION_STRING, VERSION_OFFICIAL, getThreadId(), query_id);
LOG_FATAL(log, message.str());
} }
/// Just in case print our own stack trace. In case when llvm-symbolizer does not work. /// Just in case print our own stack trace. In case when llvm-symbolizer does not work.
@ -711,12 +729,23 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
signal_listener = std::make_unique<SignalListener>(*this); signal_listener = std::make_unique<SignalListener>(*this);
signal_listener_thread.start(*signal_listener); signal_listener_thread.start(*signal_listener);
#if defined(__ELF__) && !defined(__FreeBSD__)
String build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
if (build_id_hex.empty())
build_id_info = "no build id";
else
build_id_info = "build id: " + build_id_hex;
#else
build_id_info = "no build id";
#endif
} }
void BaseDaemon::logRevision() const void BaseDaemon::logRevision() const
{ {
Poco::Logger::root().information("Starting " + std::string{VERSION_FULL} Poco::Logger::root().information("Starting " + std::string{VERSION_FULL}
+ " with revision " + std::to_string(ClickHouseRevision::get()) + " with revision " + std::to_string(ClickHouseRevision::get())
+ ", " + build_id_info
+ ", PID " + std::to_string(getpid())); + ", PID " + std::to_string(getpid()));
} }

View File

@ -198,6 +198,8 @@ protected:
std::string config_path; std::string config_path;
DB::ConfigProcessor::LoadedConfig loaded_config; DB::ConfigProcessor::LoadedConfig loaded_config;
Poco::Util::AbstractConfiguration * last_configuration = nullptr; Poco::Util::AbstractConfiguration * last_configuration = nullptr;
String build_id_info;
}; };

View File

@ -1,9 +1,9 @@
# This strings autochanged from release_lib.sh: # This strings autochanged from release_lib.sh:
SET(VERSION_REVISION 54435) SET(VERSION_REVISION 54436)
SET(VERSION_MAJOR 20) SET(VERSION_MAJOR 20)
SET(VERSION_MINOR 5) SET(VERSION_MINOR 6)
SET(VERSION_PATCH 1) SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 91df18a906dcffdbee6816e5389df6c65f86e35f) SET(VERSION_GITHASH efc57fb063b3fb4df968d916720ec4d4ced4642e)
SET(VERSION_DESCRIBE v20.5.1.1-prestable) SET(VERSION_DESCRIBE v20.6.1.1-prestable)
SET(VERSION_STRING 20.5.1.1) SET(VERSION_STRING 20.6.1.1)
# end of autochange # end of autochange

View File

@ -18,7 +18,7 @@ message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# glibc-compatibility library relies to fixed version of libc headers # glibc-compatibility library relies to constant version of libc headers
# (because minor changes in function attributes between different glibc versions will introduce incompatibilities) # (because minor changes in function attributes between different glibc versions will introduce incompatibilities)
# This is for x86_64. For other architectures we have separate toolchains. # This is for x86_64. For other architectures we have separate toolchains.
if (ARCH_AMD64 AND NOT_UNBUNDLED) if (ARCH_AMD64 AND NOT_UNBUNDLED)

View File

@ -97,5 +97,37 @@ if (USE_INTERNAL_BOOST_LIBRARY)
add_library (boost::system ALIAS _boost_system) add_library (boost::system ALIAS _boost_system)
target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR}) target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
else () else ()
message (FATAL_ERROR "TODO: external Boost library is not supported!") # 1.70 like in contrib/boost
# 1.67 on CI
set(BOOST_VERSION 1.67)
find_package(Boost ${BOOST_VERSION} COMPONENTS
system
filesystem
iostreams
program_options
regex
REQUIRED)
add_library (_boost_headers_only INTERFACE)
add_library (boost::headers_only ALIAS _boost_headers_only)
target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${Boost_INCLUDE_DIR})
add_library (_boost_filesystem INTERFACE)
add_library (_boost_iostreams INTERFACE)
add_library (_boost_program_options INTERFACE)
add_library (_boost_regex INTERFACE)
add_library (_boost_system INTERFACE)
target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY})
target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY})
target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
add_library (boost::filesystem ALIAS _boost_filesystem)
add_library (boost::iostreams ALIAS _boost_iostreams)
add_library (boost::program_options ALIAS _boost_program_options)
add_library (boost::regex ALIAS _boost_regex)
add_library (boost::system ALIAS _boost_system)
endif () endif ()

2
contrib/hyperscan vendored

@ -1 +1 @@
Subproject commit 3058c9c20cba3accdf92544d8513a26240c4ff70 Subproject commit 3907fd00ee8b2538739768fa9533f8635a276531

View File

@ -219,7 +219,9 @@ if (ENABLE_HYPERSCAN)
target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1) target_compile_definitions (hyperscan PUBLIC USE_HYPERSCAN=1)
target_compile_options (hyperscan target_compile_options (hyperscan
PRIVATE -g0 -march=corei7 # library has too much debug information PRIVATE -g0 # Library has too much debug information
-march=corei7 -O2 -fno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden # The options from original build system
-fno-sanitize=undefined # Assume the library takes care of itself
) )
target_include_directories (hyperscan target_include_directories (hyperscan
PRIVATE PRIVATE

View File

@ -1,14 +1,19 @@
option (ENABLE_JEMALLOC "Enable jemalloc allocator" ${ENABLE_LIBRARIES}) option (ENABLE_JEMALLOC "Enable jemalloc allocator" ${ENABLE_LIBRARIES})
if (SANITIZE OR NOT OS_LINUX OR NOT (ARCH_AMD64 OR ARCH_ARM)) if (SANITIZE OR NOT (ARCH_AMD64 OR ARCH_ARM) OR NOT (OS_LINUX OR OS_FREEBSD))
set (ENABLE_JEMALLOC OFF) set (ENABLE_JEMALLOC OFF)
message (STATUS "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used on Linux with x86_64 or aarch64.") message (STATUS "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64 or aarch64 on linux or freebsd.")
endif () endif ()
if (ENABLE_JEMALLOC) if (ENABLE_JEMALLOC)
if (NOT OS_LINUX)
message (WARNING "jemalloc support on non-linux is EXPERIMENTAL")
endif()
option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED}) option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED})
if (USE_INTERNAL_JEMALLOC) if (USE_INTERNAL_JEMALLOC)
if (OS_LINUX)
# ThreadPool select job randomly, and there can be some threads that had been # ThreadPool select job randomly, and there can be some threads that had been
# performed some memory heavy task before and will be inactive for some time, # performed some memory heavy task before and will be inactive for some time,
# but until it will became active again, the memory will not be freed since by # but until it will became active again, the memory will not be freed since by
@ -18,6 +23,9 @@ if (ENABLE_JEMALLOC)
# By enabling percpu_arena number of arenas limited to number of CPUs and hence # By enabling percpu_arena number of arenas limited to number of CPUs and hence
# this problem should go away. # this problem should go away.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0") set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0")
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0")
endif()
# CACHE variable is empty, to allow changing defaults without necessity # CACHE variable is empty, to allow changing defaults without necessity
# to purge cache # to purge cache
set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" ) set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" )
@ -71,14 +79,26 @@ if (ENABLE_JEMALLOC)
target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include) target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
target_include_directories(jemalloc SYSTEM PUBLIC include) target_include_directories(jemalloc SYSTEM PUBLIC include)
set(JEMALLOC_INCLUDE) set (JEMALLOC_INCLUDE_PREFIX)
if (ARCH_AMD64) # OS_
set(JEMALLOC_INCLUDE_PREFIX include_linux_x86_64) if (OS_LINUX)
elseif (ARCH_ARM) set (JEMALLOC_INCLUDE_PREFIX "include_linux")
set(JEMALLOC_INCLUDE_PREFIX include_linux_aarch64) elseif (OS_FREEBSD)
set (JEMALLOC_INCLUDE_PREFIX "include_freebsd")
elseif (OS_DARWIN)
set (JEMALLOC_INCLUDE_PREFIX "include_darwin")
else ()
message (FATAL_ERROR "This OS is not supported")
endif () endif ()
target_include_directories(jemalloc SYSTEM PUBLIC # ARCH_
${JEMALLOC_INCLUDE_PREFIX}) if (ARCH_AMD64)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64")
elseif (ARCH_ARM)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64")
else ()
message (FATAL_ERROR "This arch is not supported")
endif ()
configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h) ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h)
target_include_directories(jemalloc SYSTEM PRIVATE target_include_directories(jemalloc SYSTEM PRIVATE

View File

@ -1,3 +1,13 @@
// OSX does not have this for system alloc functions, so you will get
// "exception specification in declaration" error.
#if defined(__APPLE__) || defined(__FreeBSD__)
# undef JEMALLOC_NOTHROW
# define JEMALLOC_NOTHROW
# undef JEMALLOC_CXX_THROW
# define JEMALLOC_CXX_THROW
#endif
/* /*
* The je_ prefix on the following public symbol declarations is an artifact * The je_ prefix on the following public symbol declarations is an artifact
* of namespace management, and should be omitted in application code unless * of namespace management, and should be omitted in application code unless

View File

@ -0,0 +1,372 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
#define JEMALLOC_PREFIX "je_"
#define JEMALLOC_CPREFIX "JE_"
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 0
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
#define JEMALLOC_OS_UNFAIR_LOCK
/* Defined if syscall(2) is usable. */
/* #undef JEMALLOC_USE_SYSCALL */
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */
/*
* Defined if mach_absolute_time() is available.
*/
#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
/* #undef JEMALLOC_MUTEX_INIT_CB */
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
/* #undef JEMALLOC_DSS */
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
/* #undef JEMALLOC_LAZY_LOCK */
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 16
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 29
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
/* #undef JEMALLOC_TLS */
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#define JEMALLOC_ZONE
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
/* #undef JEMALLOC_BACKGROUND_THREAD */
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
/* #undef JEMALLOC_IS_MALLOC */
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -0,0 +1,372 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
#define JEMALLOC_PREFIX "je_"
#define JEMALLOC_CPREFIX "JE_"
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT __asm__ volatile("pause")
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 1
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
#define JEMALLOC_OS_UNFAIR_LOCK
/* Defined if syscall(2) is usable. */
/* #undef JEMALLOC_USE_SYSCALL */
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */
/*
* Defined if mach_absolute_time() is available.
*/
#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
/* #undef JEMALLOC_MUTEX_INIT_CB */
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
/* #undef JEMALLOC_DSS */
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
/* #undef JEMALLOC_LAZY_LOCK */
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 12
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 21
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
/* #undef JEMALLOC_TLS */
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#define JEMALLOC_ZONE
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
/* #undef JEMALLOC_BACKGROUND_THREAD */
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
/* #undef JEMALLOC_IS_MALLOC */
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -0,0 +1,373 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
/* #undef JEMALLOC_PREFIX */
/* #undef JEMALLOC_CPREFIX */
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 0
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
/* #undef JEMALLOC_OS_UNFAIR_LOCK */
/* Defined if syscall(2) is usable. */
#define JEMALLOC_USE_SYSCALL
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
// Only since 12.1-STABLE
// #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
/*
* Defined if mach_absolute_time() is available.
*/
/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
#define JEMALLOC_MALLOC_THREAD_CLEANUP
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
#define JEMALLOC_MUTEX_INIT_CB 1
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
#define JEMALLOC_DSS
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
#define JEMALLOC_LAZY_LOCK
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 16
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 29
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
#define JEMALLOC_TLS
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
/* #undef JEMALLOC_ZONE */
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
#define JEMALLOC_SYSCTL_VM_OVERCOMMIT
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
#define JEMALLOC_BACKGROUND_THREAD 1
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#define JEMALLOC_IS_MALLOC 1
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -0,0 +1,373 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
/* #undef JEMALLOC_PREFIX */
/* #undef JEMALLOC_CPREFIX */
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT __asm__ volatile("pause")
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 1
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
/* #undef JEMALLOC_OS_UNFAIR_LOCK */
/* Defined if syscall(2) is usable. */
#define JEMALLOC_USE_SYSCALL
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
// Only since 12.1-STABLE
// #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
/*
* Defined if mach_absolute_time() is available.
*/
/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
#define JEMALLOC_MALLOC_THREAD_CLEANUP
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
#define JEMALLOC_MUTEX_INIT_CB 1
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
#define JEMALLOC_DSS
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
#define JEMALLOC_LAZY_LOCK
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 12
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 21
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
#define JEMALLOC_TLS
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
/* #undef JEMALLOC_ZONE */
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
#define JEMALLOC_SYSCTL_VM_OVERCOMMIT
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
#define JEMALLOC_BACKGROUND_THREAD 1
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#define JEMALLOC_IS_MALLOC 1
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -35,7 +35,7 @@
*/ */
#define CPU_SPINWAIT #define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ /* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 9 #define HAVE_CPU_SPINWAIT 0
/* /*
* Number of significant bits in virtual addresses. This may be less than the * Number of significant bits in virtual addresses. This may be less than the

View File

@ -1,213 +0,0 @@
#ifndef JEMALLOC_PREAMBLE_H
#define JEMALLOC_PREAMBLE_H
#include "jemalloc_internal_defs.h"
#include "jemalloc/internal/jemalloc_internal_decls.h"
#ifdef JEMALLOC_UTRACE
#include <sys/ktrace.h>
#endif
#define JEMALLOC_NO_DEMANGLE
#ifdef JEMALLOC_JET
# undef JEMALLOC_IS_MALLOC
# define JEMALLOC_N(n) jet_##n
# include "jemalloc/internal/public_namespace.h"
# define JEMALLOC_NO_RENAME
# include "jemalloc/jemalloc.h"
# undef JEMALLOC_NO_RENAME
#else
# define JEMALLOC_N(n) je_##n
# include "jemalloc/jemalloc.h"
#endif
#if defined(JEMALLOC_OSATOMIC)
#include <libkern/OSAtomic.h>
#endif
#ifdef JEMALLOC_ZONE
#include <mach/mach_error.h>
#include <mach/mach_init.h>
#include <mach/vm_map.h>
#endif
#include "jemalloc/internal/jemalloc_internal_macros.h"
/*
* Note that the ordering matters here; the hook itself is name-mangled. We
* want the inclusion of hooks to happen early, so that we hook as much as
* possible.
*/
#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE
# ifndef JEMALLOC_JET
# include "jemalloc/internal/private_namespace.h"
# else
# include "jemalloc/internal/private_namespace_jet.h"
# endif
#endif
#include "jemalloc/internal/test_hooks.h"
#ifdef JEMALLOC_DEFINE_MADVISE_FREE
# define JEMALLOC_MADV_FREE 8
#endif
static const bool config_debug =
#ifdef JEMALLOC_DEBUG
true
#else
false
#endif
;
static const bool have_dss =
#ifdef JEMALLOC_DSS
true
#else
false
#endif
;
static const bool have_madvise_huge =
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
true
#else
false
#endif
;
static const bool config_fill =
#ifdef JEMALLOC_FILL
true
#else
false
#endif
;
static const bool config_lazy_lock =
#ifdef JEMALLOC_LAZY_LOCK
true
#else
false
#endif
;
static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
static const bool config_prof =
#ifdef JEMALLOC_PROF
true
#else
false
#endif
;
static const bool config_prof_libgcc =
#ifdef JEMALLOC_PROF_LIBGCC
true
#else
false
#endif
;
static const bool config_prof_libunwind =
#ifdef JEMALLOC_PROF_LIBUNWIND
true
#else
false
#endif
;
static const bool maps_coalesce =
#ifdef JEMALLOC_MAPS_COALESCE
true
#else
false
#endif
;
static const bool config_stats =
#ifdef JEMALLOC_STATS
true
#else
false
#endif
;
static const bool config_tls =
#ifdef JEMALLOC_TLS
true
#else
false
#endif
;
static const bool config_utrace =
#ifdef JEMALLOC_UTRACE
true
#else
false
#endif
;
static const bool config_xmalloc =
#ifdef JEMALLOC_XMALLOC
true
#else
false
#endif
;
static const bool config_cache_oblivious =
#ifdef JEMALLOC_CACHE_OBLIVIOUS
true
#else
false
#endif
;
/*
* Undocumented, for jemalloc development use only at the moment. See the note
* in jemalloc/internal/log.h.
*/
static const bool config_log =
#ifdef JEMALLOC_LOG
true
#else
false
#endif
;
/*
* Are extra safety checks enabled; things like checking the size of sized
* deallocations, double-frees, etc.
*/
static const bool config_opt_safety_checks =
#ifdef JEMALLOC_OPT_SAFETY_CHECKS
true
#elif defined(JEMALLOC_DEBUG)
/*
* This lets us only guard safety checks by one flag instead of two; fast
* checks can guard solely by config_opt_safety_checks and run in debug mode
* too.
*/
true
#else
false
#endif
;
#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
/* Currently percpu_arena depends on sched_getcpu. */
#define JEMALLOC_PERCPU_ARENA
#endif
static const bool have_percpu_arena =
#ifdef JEMALLOC_PERCPU_ARENA
true
#else
false
#endif
;
/*
* Undocumented, and not recommended; the application should take full
* responsibility for tracking provenance.
*/
static const bool force_ivsalloc =
#ifdef JEMALLOC_FORCE_IVSALLOC
true
#else
false
#endif
;
static const bool have_background_thread =
#ifdef JEMALLOC_BACKGROUND_THREAD
true
#else
false
#endif
;
#endif /* JEMALLOC_PREAMBLE_H */

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (20.5.1.1) unstable; urgency=low clickhouse (20.6.1.1) unstable; urgency=low
* Modified source code * Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 28 Apr 2020 20:12:13 +0300 -- clickhouse-release <clickhouse-release@yandex-team.ru> Mon, 22 Jun 2020 20:40:23 +0300

2
debian/control vendored
View File

@ -28,7 +28,7 @@ Description: Client binary for ClickHouse
Package: clickhouse-common-static Package: clickhouse-common-static
Architecture: any Architecture: any
Depends: ${shlibs:Depends}, ${misc:Depends}, tzdata Depends: ${shlibs:Depends}, ${misc:Depends}
Suggests: clickhouse-common-static-dbg Suggests: clickhouse-common-static-dbg
Replaces: clickhouse-common, clickhouse-server-base Replaces: clickhouse-common, clickhouse-server-base
Provides: clickhouse-common, clickhouse-server-base Provides: clickhouse-common, clickhouse-server-base

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.* ARG version=20.6.1.*
RUN apt-get update \ RUN apt-get update \
&& apt-get install --yes --no-install-recommends \ && apt-get install --yes --no-install-recommends \
@ -17,7 +17,6 @@ RUN apt-get update \
clickhouse-client=$version \ clickhouse-client=$version \
clickhouse-common-static=$version \ clickhouse-common-static=$version \
locales \ locales \
tzdata \
&& rm -rf /var/lib/apt/lists/* /var/cache/debconf \ && rm -rf /var/lib/apt/lists/* /var/cache/debconf \
&& apt-get clean && apt-get clean

View File

@ -6,7 +6,6 @@
"docker/test/compatibility/ubuntu": "yandex/clickhouse-test-old-ubuntu", "docker/test/compatibility/ubuntu": "yandex/clickhouse-test-old-ubuntu",
"docker/test/integration/base": "yandex/clickhouse-integration-test", "docker/test/integration/base": "yandex/clickhouse-integration-test",
"docker/test/performance-comparison": "yandex/clickhouse-performance-comparison", "docker/test/performance-comparison": "yandex/clickhouse-performance-comparison",
"docker/test/pvs": "yandex/clickhouse-pvs-test",
"docker/test/stateful": "yandex/clickhouse-stateful-test", "docker/test/stateful": "yandex/clickhouse-stateful-test",
"docker/test/stateful_with_coverage": "yandex/clickhouse-stateful-test-with-coverage", "docker/test/stateful_with_coverage": "yandex/clickhouse-stateful-test-with-coverage",
"docker/test/stateless": "yandex/clickhouse-stateless-test", "docker/test/stateless": "yandex/clickhouse-stateless-test",

View File

@ -18,7 +18,7 @@ ccache --zero-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
rm -f CMakeCache.txt rm -f CMakeCache.txt
cmake .. -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS cmake .. -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS
ninja clickhouse-bundle ninja -v clickhouse-bundle
mv ./programs/clickhouse* /output mv ./programs/clickhouse* /output
mv ./src/unit_tests_dbms /output mv ./src/unit_tests_dbms /output
find . -name '*.so' -print -exec mv '{}' /output \; find . -name '*.so' -print -exec mv '{}' /output \;

View File

@ -54,6 +54,8 @@ RUN apt-get --allow-unauthenticated update -y \
libboost-system-dev \ libboost-system-dev \
libboost-filesystem-dev \ libboost-filesystem-dev \
libboost-thread-dev \ libboost-thread-dev \
libboost-iostreams-dev \
libboost-regex-dev \
zlib1g-dev \ zlib1g-dev \
liblz4-dev \ liblz4-dev \
libdouble-conversion-dev \ libdouble-conversion-dev \

View File

@ -142,7 +142,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
if unbundled: if unbundled:
# TODO: fix build with ENABLE_RDKAFKA # TODO: fix build with ENABLE_RDKAFKA
cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_BOOST_LIBRARY=1') cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0')
if split_binary: if split_binary:
cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1') cmake_flags.append('-DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1')

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.* ARG version=20.6.1.*
ARG gosu_ver=1.10 ARG gosu_ver=1.10
RUN apt-get update \ RUN apt-get update \
@ -21,7 +21,6 @@ RUN apt-get update \
locales \ locales \
ca-certificates \ ca-certificates \
wget \ wget \
tzdata \
&& rm -rf \ && rm -rf \
/var/lib/apt/lists/* \ /var/lib/apt/lists/* \
/var/cache/debconf \ /var/cache/debconf \

View File

@ -110,7 +110,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# create default database, if defined # create default database, if defined
if [ -n "$CLICKHOUSE_DB" ]; then if [ -n "$CLICKHOUSE_DB" ]; then
echo "$0: create database '$CLICKHOUSE_DB'" echo "$0: create database '$CLICKHOUSE_DB'"
"${clickhouseclient[@]}" "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB"; "${clickhouseclient[@]}" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
fi fi
for f in /docker-entrypoint-initdb.d/*; do for f in /docker-entrypoint-initdb.d/*; do

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=20.5.1.* ARG version=20.6.1.*
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \ apt-get install -y apt-transport-https dirmngr && \

View File

@ -17,8 +17,7 @@ RUN apt-get update \
odbc-postgresql \ odbc-postgresql \
sqlite3 \ sqlite3 \
curl \ curl \
tar \ tar
tzdata
RUN rm -rf \ RUN rm -rf \
/var/lib/apt/lists/* \ /var/lib/apt/lists/* \
/var/cache/debconf \ /var/cache/debconf \

View File

@ -7,3 +7,7 @@ services:
POSTGRES_PASSWORD: mysecretpassword POSTGRES_PASSWORD: mysecretpassword
ports: ports:
- 5432:5432 - 5432:5432
networks:
default:
aliases:
- postgre-sql.local

View File

@ -22,7 +22,7 @@ function configure
echo all killed echo all killed
set -m # Spawn temporary in its own process groups set -m # Spawn temporary in its own process groups
left/clickhouse-server --config-file=left/config/config.xml -- --path db0 &> setup-server-log.log & left/clickhouse-server --config-file=left/config/config.xml -- --path db0 --user_files_path db0/user_files &> setup-server-log.log &
left_pid=$! left_pid=$!
kill -0 $left_pid kill -0 $left_pid
disown $left_pid disown $left_pid
@ -59,12 +59,12 @@ function restart
set -m # Spawn servers in their own process groups set -m # Spawn servers in their own process groups
left/clickhouse-server --config-file=left/config/config.xml -- --path left/db &>> left-server-log.log & left/clickhouse-server --config-file=left/config/config.xml -- --path left/db --user_files_path left/db/user_files &>> left-server-log.log &
left_pid=$! left_pid=$!
kill -0 $left_pid kill -0 $left_pid
disown $left_pid disown $left_pid
right/clickhouse-server --config-file=right/config/config.xml -- --path right/db &>> right-server-log.log & right/clickhouse-server --config-file=right/config/config.xml -- --path right/db --user_files_path right/db/user_files &>> right-server-log.log &
right_pid=$! right_pid=$!
kill -0 $right_pid kill -0 $right_pid
disown $right_pid disown $right_pid
@ -198,12 +198,14 @@ function get_profiles
clickhouse-client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port 9001 --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port 9001 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port 9001 --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port 9001 --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port 9001 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: & clickhouse-client --port 9002 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port 9002 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port 9002 --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & clickhouse-client --port 9002 --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & clickhouse-client --port 9002 --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
clickhouse-client --port 9002 --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > right-async-metric-log.tsv ||: &
wait wait

View File

@ -17,7 +17,7 @@ function find_reference_sha
# If not master, try to fetch pull/.../{head,merge} # If not master, try to fetch pull/.../{head,merge}
if [ "$PR_TO_TEST" != "0" ] if [ "$PR_TO_TEST" != "0" ]
then then
git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pr/*" git -C ch fetch origin "refs/pull/$PR_TO_TEST/*:refs/heads/pull/$PR_TO_TEST/*"
fi fi
# Go back from the revision to be tested, trying to find the closest published # Go back from the revision to be tested, trying to find the closest published
@ -28,9 +28,9 @@ function find_reference_sha
# and SHA_TO_TEST, but a revision that is merged with recent master, given # and SHA_TO_TEST, but a revision that is merged with recent master, given
# by pull/.../merge ref. # by pull/.../merge ref.
# Master is the first parent of the pull/.../merge. # Master is the first parent of the pull/.../merge.
if git -C ch rev-parse pr/merge if git -C ch rev-parse "pull/$PR_TO_TEST/merge"
then then
start_ref=pr/merge~ start_ref="pull/$PR_TO_TEST/merge~"
fi fi
while : while :
@ -73,11 +73,11 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
( (
git -C ch log -1 --decorate "$SHA_TO_TEST" ||: git -C ch log -1 --decorate "$SHA_TO_TEST" ||:
if git -C ch rev-parse pr/merge &> /dev/null if git -C ch rev-parse "pull/$PR_TO_TEST/merge" &> /dev/null
then then
echo echo
echo Real tested commit is: echo Real tested commit is:
git -C ch log -1 --decorate pr/merge git -C ch log -1 --decorate "pull/$PR_TO_TEST/merge"
fi fi
) | tee right-commit.txt ) | tee right-commit.txt
@ -87,7 +87,7 @@ then
# tests for use by compare.sh. Compare to merge base, because master might be # tests for use by compare.sh. Compare to merge base, because master might be
# far in the future and have unrelated test changes. # far in the future and have unrelated test changes.
base=$(git -C ch merge-base "$SHA_TO_TEST" master) base=$(git -C ch merge-base "$SHA_TO_TEST" master)
git -C ch diff --name-only "$SHA_TO_TEST" "$base" | tee changed-tests.txt git -C ch diff --name-only "$base" "$SHA_TO_TEST" | tee changed-tests.txt
if grep -vq '^tests/performance' changed-tests.txt if grep -vq '^tests/performance' changed-tests.txt
then then
# Have some other changes besides the tests, so truncate the test list, # Have some other changes besides the tests, so truncate the test list,
@ -131,5 +131,8 @@ done
dmesg -T > dmesg.log dmesg -T > dmesg.log
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze 7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
report analyze benchmark
cp compare.log /output cp compare.log /output

View File

@ -20,9 +20,9 @@ RUN apt-get --allow-unauthenticated update -y \
# apt-get --allow-unauthenticated install --yes --no-install-recommends \ # apt-get --allow-unauthenticated install --yes --no-install-recommends \
# pvs-studio # pvs-studio
ENV PKG_VERSION="pvs-studio-7.07.38234.46-amd64.deb" ENV PKG_VERSION="pvs-studio-7.08.39365.50-amd64.deb"
RUN wget "http://files.viva64.com/$PKG_VERSION" RUN wget "https://files.viva64.com/$PKG_VERSION"
RUN sudo dpkg -i "$PKG_VERSION" RUN sudo dpkg -i "$PKG_VERSION"
CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \ CMD cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic \

View File

@ -1,4 +1,4 @@
## function-name {#function-name-in-lower-case} ## functionName {#functionname-in-lower-case}
Short description. Short description.

View File

@ -1,4 +1,4 @@
## setting-name {#setting-name-in-lower-case} ## setting_name {#setting_name}
Description. Description.

View File

@ -7,7 +7,7 @@ toc_title: How to Build ClickHouse on Linux for AARCH64 (ARM64)
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers. This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on another Linux machine with AARCH64 CPU architecture. This is intended for continuous integration checks that run on Linux servers.
The cross-build for AARCH64 is based on the [Build instructions](build.md), follow them first. The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8} # Install Clang-8 {#install-clang-8}

View File

@ -5,9 +5,9 @@ toc_title: How to Build ClickHouse on Linux for Mac OS X
# How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x} # How to Build ClickHouse on Linux for Mac OS X {#how-to-build-clickhouse-on-linux-for-mac-os-x}
This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](build-osx.md). This is for the case when you have Linux machine and want to use it to build `clickhouse` binary that will run on OS X. This is intended for continuous integration checks that run on Linux servers. If you want to build ClickHouse directly on Mac OS X, then proceed with [another instruction](../development/build-osx.md).
The cross-build for Mac OS X is based on the [Build instructions](build.md), follow them first. The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first.
# Install Clang-8 {#install-clang-8} # Install Clang-8 {#install-clang-8}

View File

@ -28,10 +28,9 @@ There are several ways to do this.
### Install from Repository {#install-from-repository} ### Install from Repository {#install-from-repository}
On Ubuntu 19.10 or newer: On Ubuntu 19.10 or newer:
```
$ sudo apt-get update $ sudo apt-get update
$ sudo apt-get install gcc-9 g++-9 $ sudo apt-get install gcc-9 g++-9
```
### Install from a PPA Package {#install-from-a-ppa-package} ### Install from a PPA Package {#install-from-a-ppa-package}

View File

@ -3,11 +3,11 @@ toc_priority: 61
toc_title: For Beginners toc_title: For Beginners
--- ---
# The Beginner ClickHouse Developer Instruction # The Beginner ClickHouse Developer Instruction {#the-beginner-clickhouse-developer-instruction}
Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X. Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T. If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading. ClickHouse cannot work or build on a 32-bit system. You should acquire access to a 64-bit system and you can continue reading.
@ -137,7 +137,7 @@ Official Yandex builds currently use GCC because it generates machine code of sl
To install GCC on Ubuntu run: `sudo apt install gcc g++` To install GCC on Ubuntu run: `sudo apt install gcc g++`
Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/#install-gcc-9. Check the version of gcc: `gcc --version`. If it is below 9, then follow the instruction here: https://clickhouse.tech/docs/en/development/build/\#install-gcc-9.
Mac OS X build is supported only for Clang. Just run `brew install llvm` Mac OS X build is supported only for Clang. Just run `brew install llvm`

View File

@ -200,7 +200,7 @@ Debug version of `jemalloc` is used for debug build.
ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries. ClickHouse fuzzing is implemented both using [libFuzzer](https://llvm.org/docs/LibFuzzer.html) and random SQL queries.
All the fuzz testing should be performed with sanitizers (Address and Undefined). All the fuzz testing should be performed with sanitizers (Address and Undefined).
LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have "\_fuzzer" name postfixes. LibFuzzer is used for isolated fuzz testing of library code. Fuzzers are implemented as part of test code and have “\_fuzzer” name postfixes.
Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`. Fuzzer example can be found at `src/Parsers/tests/lexer_fuzzer.cpp`. LibFuzzer-specific configs, dictionaries and corpus are stored at `tests/fuzz`.
We encourage you to write fuzz tests for every functionality that handles user input. We encourage you to write fuzz tests for every functionality that handles user input.
@ -211,7 +211,6 @@ Google OSS-Fuzz can be found at `docker/fuzz`.
We also use simple fuzz test to generate random SQL queries and to check that the server doesnt die executing them. We also use simple fuzz test to generate random SQL queries and to check that the server doesnt die executing them.
You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer). You can find it in `00746_sql_fuzzy.pl`. This test should be run continuously (overnight and longer).
## Security Audit {#security-audit} ## Security Audit {#security-audit}
People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint. People from Yandex Security Team do some basic overview of ClickHouse capabilities from the security standpoint.

View File

@ -12,8 +12,8 @@ By default, ClickHouse uses its native database engine, which provides configura
You can also use the following database engines: You can also use the following database engines:
- [MySQL](mysql.md) - [MySQL](../../engines/database-engines/mysql.md)
- [Lazy](lazy.md) - [Lazy](../../engines/database-engines/lazy.md)
[Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/database_engines/) <!--hide-->

View File

@ -1,8 +1,15 @@
--- ---
toc_folder_title: Engines toc_folder_title: Engines
toc_hidden: true
toc_priority: 25 toc_priority: 25
toc_title: hidden toc_title: hidden
toc_hidden: true
--- ---
# ClickHouse Engines
There are two key engine kinds in ClickHouse:
- [Table engines](table-engines/index.md)
- [Database engines](database-engines/index.md)
{## [Original article](https://clickhouse.tech/docs/en/engines/) ##} {## [Original article](https://clickhouse.tech/docs/en/engines/) ##}

View File

@ -19,27 +19,27 @@ The table engine (type of table) determines:
### MergeTree {#mergetree} ### MergeTree {#mergetree}
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated*](mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines. The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, secondary data-skipping indexes, and other features not supported in other engines.
Engines in the family: Engines in the family:
- [MergeTree](mergetree-family/mergetree.md#mergetree) - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#mergetree)
- [ReplacingMergeTree](mergetree-family/replacingmergetree.md#replacingmergetree) - [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md#replacingmergetree)
- [SummingMergeTree](mergetree-family/summingmergetree.md#summingmergetree) - [SummingMergeTree](../../engines/table-engines/mergetree-family/summingmergetree.md#summingmergetree)
- [AggregatingMergeTree](mergetree-family/aggregatingmergetree.md#aggregatingmergetree) - [AggregatingMergeTree](../../engines/table-engines/mergetree-family/aggregatingmergetree.md#aggregatingmergetree)
- [CollapsingMergeTree](mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) - [CollapsingMergeTree](../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree)
- [VersionedCollapsingMergeTree](mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree) - [VersionedCollapsingMergeTree](../../engines/table-engines/mergetree-family/versionedcollapsingmergetree.md#versionedcollapsingmergetree)
- [GraphiteMergeTree](mergetree-family/graphitemergetree.md#graphitemergetree) - [GraphiteMergeTree](../../engines/table-engines/mergetree-family/graphitemergetree.md#graphitemergetree)
### Log {#log} ### Log {#log}
Lightweight [engines](log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole. Lightweight [engines](../../engines/table-engines/log-family/index.md) with minimum functionality. Theyre the most effective when you need to quickly write many small tables (up to approximately 1 million rows) and read them later as a whole.
Engines in the family: Engines in the family:
- [TinyLog](log-family/tinylog.md#tinylog) - [TinyLog](../../engines/table-engines/log-family/tinylog.md#tinylog)
- [StripeLog](log-family/stripelog.md#stripelog) - [StripeLog](../../engines/table-engines/log-family/stripelog.md#stripelog)
- [Log](log-family/log.md#log) - [Log](../../engines/table-engines/log-family/log.md#log)
### Integration Engines {#integration-engines} ### Integration Engines {#integration-engines}
@ -47,28 +47,28 @@ Engines for communicating with other data storage and processing systems.
Engines in the family: Engines in the family:
- [Kafka](integrations/kafka.md#kafka) - [Kafka](../../engines/table-engines/integrations/kafka.md#kafka)
- [MySQL](integrations/mysql.md#mysql) - [MySQL](../../engines/table-engines/integrations/mysql.md#mysql)
- [ODBC](integrations/odbc.md#table-engine-odbc) - [ODBC](../../engines/table-engines/integrations/odbc.md#table-engine-odbc)
- [JDBC](integrations/jdbc.md#table-engine-jdbc) - [JDBC](../../engines/table-engines/integrations/jdbc.md#table-engine-jdbc)
- [HDFS](integrations/hdfs.md#hdfs) - [HDFS](../../engines/table-engines/integrations/hdfs.md#hdfs)
### Special Engines {#special-engines} ### Special Engines {#special-engines}
Engines in the family: Engines in the family:
- [Distributed](special/distributed.md#distributed) - [Distributed](../../engines/table-engines/special/distributed.md#distributed)
- [MaterializedView](special/materializedview.md#materializedview) - [MaterializedView](../../engines/table-engines/special/materializedview.md#materializedview)
- [Dictionary](special/dictionary.md#dictionary) - [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary)
- [Merge](special/merge.md#merge) - [Merge](../../engines/table-engines/special/merge.md#merge)
- [File](special/file.md#file) - [File](../../engines/table-engines/special/file.md#file)
- [Null](special/null.md#null) - [Null](../../engines/table-engines/special/null.md#null)
- [Set](special/set.md#set) - [Set](../../engines/table-engines/special/set.md#set)
- [Join](special/join.md#join) - [Join](../../engines/table-engines/special/join.md#join)
- [URL](special/url.md#table_engines-url) - [URL](../../engines/table-engines/special/url.md#table_engines-url)
- [View](special/view.md#table_engines-view) - [View](../../engines/table-engines/special/view.md#table_engines-view)
- [Memory](special/memory.md#memory) - [Memory](../../engines/table-engines/special/memory.md#memory)
- [Buffer](special/buffer.md#buffer) - [Buffer](../../engines/table-engines/special/buffer.md#buffer)
## Virtual Columns {#table_engines-virtual_columns} ## Virtual Columns {#table_engines-virtual_columns}
@ -80,4 +80,4 @@ To select data from a virtual column, you must specify its name in the `SELECT`
If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We dont recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore. If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We dont recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/engines/table-engines/) <!--hide-->

View File

@ -6,7 +6,7 @@ toc_title: HDFS
# HDFS {#table_engines-hdfs} # HDFS {#table_engines-hdfs}
This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar This engine provides integration with [Apache Hadoop](https://en.wikipedia.org/wiki/Apache_Hadoop) ecosystem by allowing to manage data on [HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)via ClickHouse. This engine is similar
to the [File](../special/file.md#table_engines-file) and [URL](../special/url.md#table_engines-url) engines, but provides Hadoop-specific features. to the [File](../../../engines/table-engines/special/file.md#table_engines-file) and [URL](../../../engines/table-engines/special/url.md#table_engines-url) engines, but provides Hadoop-specific features.
## Usage {#usage} ## Usage {#usage}
@ -116,6 +116,6 @@ CREARE TABLE big_table (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9
**See Also** **See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns) - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/hdfs/) <!--hide-->

View File

@ -173,7 +173,7 @@ For a list of possible configuration options, see the [librdkafka configuration
**See Also** **See Also**
- [Virtual columns](../index.md#table_engines-virtual_columns) - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns)
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) - [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/kafka/) <!--hide-->

View File

@ -9,9 +9,9 @@ These engines were developed for scenarios when you need to quickly write many s
Engines of the family: Engines of the family:
- [StripeLog](stripelog.md) - [StripeLog](../../../engines/table-engines/log-family/stripelog.md)
- [Log](log.md) - [Log](../../../engines/table-engines/log-family/log.md)
- [TinyLog](tinylog.md) - [TinyLog](../../../engines/table-engines/log-family/tinylog.md)
## Common Properties {#common-properties} ## Common Properties {#common-properties}

View File

@ -5,9 +5,9 @@ toc_title: Log
# Log {#log} # Log {#log}
Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article. Engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Log differs from [TinyLog](tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads. Log differs from [TinyLog](../../../engines/table-engines/log-family/tinylog.md) in that a small file of “marks” resides with the column files. These marks are written on every data block and contain offsets that indicate where to start reading the file in order to skip the specified number of rows. This makes it possible to read table data in multiple threads.
For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other. For concurrent data access, the read operations can be performed simultaneously, while write operations block reads and each other.
The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes. The Log engine does not support indexes. Similarly, if writing to a table failed, the table is broken, and reading from it returns an error. The Log engine is appropriate for temporary data, write-once tables, and for testing or demonstration purposes.

View File

@ -5,7 +5,7 @@ toc_title: StripeLog
# Stripelog {#stripelog} # Stripelog {#stripelog}
This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](log-family.md) article. This engine belongs to the family of log engines. See the common properties of log engines and their differences in the [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) article.
Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows). Use this engine in scenarios when you need to write many tables with a small amount of data (less than 1 million rows).

View File

@ -5,10 +5,10 @@ toc_title: TinyLog
# TinyLog {#tinylog} # TinyLog {#tinylog}
The engine belongs to the log engine family. See [Log Engine Family](log-family.md) for common properties of log engines and their differences. The engine belongs to the log engine family. See [Log Engine Family](../../../engines/table-engines/log-family/log-family.md) for common properties of log engines and their differences.
This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient. This table engine is typically used with the write-once method: write data one time, then read it as many times as necessary. For example, you can use `TinyLog`-type tables for intermediary data that is processed in small batches. Note that storing data in a large number of small tables is inefficient.
Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](log.md) engine (fewer files need to be opened). Queries are executed in a single stream. In other words, this engine is intended for relatively small tables (up to about 1,000,000 rows). It makes sense to use this table engine if you have many small tables, since its simpler than the [Log](../../../engines/table-engines/log-family/log.md) engine (fewer files need to be opened).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/tinylog/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: AggregatingMergeTree
# Aggregatingmergetree {#aggregatingmergetree} # Aggregatingmergetree {#aggregatingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions. The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree), altering the logic for data parts merging. ClickHouse replaces all rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with a single row (within a one data part) that stores a combination of states of aggregate functions.
You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views. You can use `AggregatingMergeTree` tables for incremental data aggregation, including for aggregated materialized views.
@ -36,7 +36,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses** **Query clauses**
When creating a `AggregatingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table. When creating a `AggregatingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1"> <details markdown="1">

View File

@ -5,7 +5,7 @@ toc_title: CollapsingMergeTree
# CollapsingMergeTree {#table_engine-collapsingmergetree} # CollapsingMergeTree {#table_engine-collapsingmergetree}
The engine inherits from [MergeTree](mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm. The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) and adds the logic of rows collapsing to data parts merge algorithm.
`CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document. `CollapsingMergeTree` asynchronously deletes (collapses) pairs of rows if all of the fields in a sorting key (`ORDER BY`) are equivalent excepting the particular field `Sign` which can have `1` and `-1` values. Rows without a pair are kept. For more details see the [Collapsing](#table_engine-collapsingmergetree-collapsing) section of the document.
@ -36,7 +36,7 @@ For a description of query parameters, see [query description](../../../sql-refe
**Query clauses** **Query clauses**
When creating a `CollapsingMergeTree` table, the same [query clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table. When creating a `CollapsingMergeTree` table, the same [query clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1"> <details markdown="1">

View File

@ -5,11 +5,11 @@ toc_title: Custom Partitioning Key
# Custom Partitioning Key {#custom-partitioning-key} # Custom Partitioning Key {#custom-partitioning-key}
Partitioning is available for the [MergeTree](mergetree.md) family tables (including [replicated](replication.md) tables). [Materialized views](../special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well. Partitioning is available for the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family tables (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). [Materialized views](../../../engines/table-engines/special/materializedview.md#materializedview) based on MergeTree tables support partitioning, as well.
A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible. A partition is a logical combination of records in a table by a specified criterion. You can set a partition by an arbitrary criterion, such as by month, by day, or by event type. Each partition is stored separately to simplify manipulations of this data. When accessing the data, ClickHouse uses the smallest subset of partitions possible.
The partition is specified in the `PARTITION BY expr` clause when [creating a table](mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`: The partition is specified in the `PARTITION BY expr` clause when [creating a table](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). The partition key can be any expression from the table columns. For example, to specify partitioning by month, use the expression `toYYYYMM(date_column)`:
``` sql ``` sql
CREATE TABLE visits CREATE TABLE visits
@ -23,7 +23,7 @@ PARTITION BY toYYYYMM(VisitDate)
ORDER BY Hour; ORDER BY Hour;
``` ```
The partition key can also be a tuple of expressions (similar to the [primary key](mergetree.md#primary-keys-and-indexes-in-queries)). For example: The partition key can also be a tuple of expressions (similar to the [primary key](../../../engines/table-engines/mergetree-family/mergetree.md#primary-keys-and-indexes-in-queries)). For example:
``` sql ``` sql
ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/name', 'replica1', Sign)
@ -38,7 +38,7 @@ When inserting new data to a table, this data is stored as a separate part (chun
!!! info "Info" !!! info "Info"
A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldnt make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors. A merge only works for data parts that have the same value for the partitioning expression. This means **you shouldnt make overly granular partitions** (more than about a thousand partitions). Otherwise, the `SELECT` query performs poorly because of an unreasonably large number of files in the file system and open file descriptors.
Use the [system.parts](../../../operations/system-tables.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table: Use the [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) table to view the table parts and partitions. For example, lets assume that we have a `visits` table with partitioning by month. Lets perform the `SELECT` query for the `system.parts` table:
``` sql ``` sql
SELECT SELECT

View File

@ -9,7 +9,7 @@ This engine is designed for thinning and aggregating/averaging (rollup) [Graphit
You can use any ClickHouse table engine to store the Graphite data if you dont need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite. You can use any ClickHouse table engine to store the Graphite data if you dont need rollup, but if you need a rollup use `GraphiteMergeTree`. The engine reduces the volume of storage and increases the efficiency of queries from Graphite.
The engine inherits properties from [MergeTree](mergetree.md). The engine inherits properties from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md).
## Creating a Table {#creating-table} ## Creating a Table {#creating-table}
@ -50,7 +50,7 @@ The names of these columns should be set in the rollup configuration.
**Query clauses** **Query clauses**
When creating a `GraphiteMergeTree` table, the same [clauses](mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table. When creating a `GraphiteMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) are required, as when creating a `MergeTree` table.
<details markdown="1"> <details markdown="1">

View File

@ -15,20 +15,20 @@ Main features:
This allows you to create a small sparse index that helps find data faster. This allows you to create a small sparse index that helps find data faster.
- Partitions can be used if the [partitioning key](custom-partitioning-key.md) is specified. - Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.
ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance. ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query. This also improves query performance.
- Data replication support. - Data replication support.
The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](replication.md). The family of `ReplicatedMergeTree` tables provides data replication. For more information, see [Data replication](../../../engines/table-engines/mergetree-family/replication.md).
- Data sampling support. - Data sampling support.
If necessary, you can set the data sampling method in the table. If necessary, you can set the data sampling method in the table.
!!! info "Info" !!! info "Info"
The [Merge](../special/merge.md#merge) engine does not belong to the `*MergeTree` family. The [Merge](../../../engines/table-engines/special/merge.md#merge) engine does not belong to the `*MergeTree` family.
## Creating a Table {#table_engine-mergetree-creating-a-table} ## Creating a Table {#table_engine-mergetree-creating-a-table}
@ -51,9 +51,6 @@ ORDER BY expr
For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md). For a description of parameters, see the [CREATE query description](../../../sql-reference/statements/create.md).
!!! note "Note"
`INDEX` is an experimental feature, see [Data Skipping Indexes](#table_engine-mergetree-data_skipping-indexes).
### Query Clauses {#mergetree-query-clauses} ### Query Clauses {#mergetree-query-clauses}
- `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters. - `ENGINE` — Name and parameters of the engine. `ENGINE = MergeTree()`. The `MergeTree` engine does not have parameters.
@ -64,9 +61,9 @@ For a description of parameters, see the [CREATE query description](../../../sql
ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause. ClickHouse uses the sorting key as a primary key if the primary key is not defined obviously by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you don't need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). Use the `ORDER BY tuple()` syntax, if you dont need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
- `PARTITION BY` — The [partitioning key](custom-partitioning-key.md). Optional. - `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional.
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
@ -196,13 +193,13 @@ The number of columns in the primary key is not explicitly limited. Depending on
ClickHouse sorts data by primary key, so the higher the consistency, the better the compression. ClickHouse sorts data by primary key, so the higher the consistency, the better the compression.
- Provide additional logic when merging data parts in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md) engines. - Provide additional logic when merging data parts in the [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) engines.
In this case it makes sense to specify the *sorting key* that is different from the primary key. In this case it makes sense to specify the *sorting key* that is different from the primary key.
A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries. A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries.
You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max_insert_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads). You can create a table without a primary key using the `ORDER BY tuple()` syntax. In this case, ClickHouse stores data in the order of inserting. If you want to save data order when inserting data by `INSERT ... SELECT` queries, set [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads).
To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries. To select data in the initial order, use [single-threaded](../../../operations/settings/settings.md#settings-max_threads) `SELECT` queries.
@ -210,8 +207,8 @@ To select data in the initial order, use [single-threaded](../../../operations/s
It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple. It is possible to specify a primary key (an expression with values that are written in the index file for each mark) that is different from the sorting key (an expression for sorting the rows in data parts). In this case the primary key expression tuple must be a prefix of the sorting key expression tuple.
This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and This feature is helpful when using the [SummingMergeTree](../../../engines/table-engines/mergetree-family/summingmergetree.md) and
[AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions. [AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) table engines. In a common case when using these engines, the table has two types of columns: *dimensions* and *measures*. Typical queries aggregate values of measure columns with arbitrary `GROUP BY` and filtering by dimensions. Because SummingMergeTree and AggregatingMergeTree aggregate rows with the same value of the sorting key, it is natural to add all dimensions to it. As a result, the key expression consists of a long list of columns and this list must be frequently updated with newly added dimensions.
In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple. In this case it makes sense to leave only a few columns in the primary key that will provide efficient range scans and add the remaining dimension columns to the sorting key tuple.
@ -257,7 +254,7 @@ ClickHouse cannot use an index if the values of the primary key in the query par
ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence. ClickHouse uses this logic not only for days of the month sequences, but for any primary key that represents a partially-monotonic sequence.
### Data Skipping Indexes (experimental) {#table_engine-mergetree-data_skipping-indexes} ### Data Skipping Indexes {#table_engine-mergetree-data_skipping-indexes}
The index declaration is in the columns section of the `CREATE` query. The index declaration is in the columns section of the `CREATE` query.
@ -502,7 +499,7 @@ Data part is the minimum movable unit for `MergeTree`-engine tables. The data be
- Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)). - Volume — Ordered set of equal disks (similar to [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)).
- Storage policy — Set of volumes and the rules for moving data between them. - Storage policy — Set of volumes and the rules for moving data between them.
The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables. The names given to the described entities can be found in the system tables, [system.storage\_policies](../../../operations/system-tables/storage_policies.md#system_tables-storage_policies) and [system.disks](../../../operations/system-tables/disks.md#system_tables-disks). To apply one of the configured storage policies for a table, use the `storage_policy` setting of `MergeTree`-engine family tables.
### Configuration {#table_engine-mergetree-multiple-volumes_configure} ### Configuration {#table_engine-mergetree-multiple-volumes_configure}
@ -632,7 +629,7 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed. The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting. The number of threads performing background moves of data parts can be changed by [background\_move\_pool\_size](../../../operations/settings/settings.md#background_move_pool_size) setting.
### Details {#details} ### Details {#details}
@ -651,7 +648,7 @@ In all these cases except for mutations and partition freezing, a part is stored
Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones. Under the hood, mutations and partition freezing make use of [hard links](https://en.wikipedia.org/wiki/Hard_link). Hard links between different disks are not supported, therefore in such cases the resulting parts are stored on the same disks as the initial ones.
In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file. In the background, parts are moved between volumes on the basis of the amount of free space (`move_factor` parameter) according to the order the volumes are declared in the configuration file.
Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs. Data is never transferred from the last one and into the first one. One may use system tables [system.part\_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (field `type = MOVE_PART`) and [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (fields `path` and `disk`) to monitor background moves. Also, the detailed information can be found in server logs.
User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met. User can force moving a part or a partition from one volume to another using the query [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition), all the restrictions for background operations are taken into account. The query initiates a move on its own and does not wait for background operations to be completed. User will get an error message if not enough free space is available or if any of the required conditions are not met.

View File

@ -5,7 +5,7 @@ toc_title: ReplacingMergeTree
# ReplacingMergeTree {#replacingmergetree} # ReplacingMergeTree {#replacingmergetree}
The engine differs from [MergeTree](mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same primary key value (or more accurately, with the same [sorting key](mergetree.md) value). The engine differs from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) in that it removes duplicate entries with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md) value.
Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you cant plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, dont count on using it, because the `OPTIMIZE` query will read and write a large amount of data. Data deduplication occurs only during a merge. Merging occurs in the background at an unknown time, so you cant plan for it. Some of the data may remain unprocessed. Although you can run an unscheduled merge using the `OPTIMIZE` query, dont count on using it, because the `OPTIMIZE` query will read and write a large amount of data.
@ -33,14 +33,14 @@ For a description of request parameters, see [request description](../../../sql-
- `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter. - `ver` — column with version. Type `UInt*`, `Date` or `DateTime`. Optional parameter.
When merging, `ReplacingMergeTree` from all the rows with the same primary key leaves only one: When merging, `ReplacingMergeTree` from all the rows with the same sorting key leaves only one:
- Last in the selection, if `ver` not set. - Last in the selection, if `ver` not set.
- With the maximum version, if `ver` specified. - With the maximum version, if `ver` specified.
**Query clauses** **Query clauses**
When creating a `ReplacingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table. When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1"> <details markdown="1">

View File

@ -63,7 +63,7 @@ For each `INSERT` query, approximately ten entries are added to ZooKeeper throug
For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers). For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasnt proven necessary on the Yandex.Metrica cluster (approximately 300 servers).
Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting. Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting.
By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option. By default, an INSERT query waits for confirmation of writing the data from only one replica. If the data was successfully written to only one replica and the server with this replica ceases to exist, the stored data will be lost. To enable getting confirmation of data writes from multiple replicas, use the `insert_quorum` option.
@ -217,6 +217,6 @@ If the data in ZooKeeper was lost or damaged, you can save data by moving it to
**See also** **See also**
- [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) - [background\_schedule\_pool\_size](../../../operations/settings/settings.md#background_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/replication/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: SummingMergeTree
# SummingMergeTree {#summingmergetree} # SummingMergeTree {#summingmergetree}
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection. The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree). The difference is that when merging data parts for `SummingMergeTree` tables ClickHouse replaces all the rows with the same primary key (or more accurately, with the same [sorting key](../../../engines/table-engines/mergetree-family/mergetree.md)) with one row which contains summarized values for the columns with the numeric data type. If the sorting key is composed in a way that a single key value corresponds to large number of rows, this significantly reduces storage volume and speeds up data selection.
We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key. We recommend to use the engine together with `MergeTree`. Store complete data in `MergeTree` table, and use `SummingMergeTree` for aggregated data storing, for example, when preparing reports. Such an approach will prevent you from losing valuable data due to an incorrectly composed primary key.
@ -35,7 +35,7 @@ For a description of request parameters, see [request description](../../../sql-
**Query clauses** **Query clauses**
When creating a `SummingMergeTree` table the same [clauses](mergetree.md) are required, as when creating a `MergeTree` table. When creating a `SummingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table.
<details markdown="1"> <details markdown="1">
@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key
When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data. When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.
ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above. ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
### Common Rules for Summation {#common-rules-for-summation} ### Common Rules for Summation {#common-rules-for-summation}
@ -110,7 +110,7 @@ The values are not summarized for columns in the primary key.
### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns} ### The Summation in the Aggregatefunction Columns {#the-summation-in-the-aggregatefunction-columns}
For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](aggregatingmergetree.md) engine aggregating according to the function. For columns of [AggregateFunction type](../../../sql-reference/data-types/aggregatefunction.md) ClickHouse behaves as [AggregatingMergeTree](../../../engines/table-engines/mergetree-family/aggregatingmergetree.md) engine aggregating according to the function.
### Nested Structures {#nested-structures} ### Nested Structures {#nested-structures}
@ -132,7 +132,7 @@ Examples:
[(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)] [(1, 100), (2, 150)] + [(1, -100)] -> [(2, 150)]
``` ```
When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference.md) function for aggregation of `Map`. When requesting data, use the [sumMap(key, value)](../../../sql-reference/aggregate-functions/reference/summap.md) function for aggregation of `Map`.
For nested data structure, you do not need to specify its columns in the tuple of columns for summation. For nested data structure, you do not need to specify its columns in the tuple of columns for summation.

View File

@ -12,7 +12,7 @@ This engine:
See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details. See the section [Collapsing](#table_engines_versionedcollapsingmergetree) for details.
The engine inherits from [MergeTree](mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion. The engine inherits from [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) and adds the logic for collapsing rows to the algorithm for merging data parts. `VersionedCollapsingMergeTree` serves the same purpose as [CollapsingMergeTree](../../../engines/table-engines/mergetree-family/collapsingmergetree.md) but uses a different collapsing algorithm that allows inserting the data in any order with multiple threads. In particular, the `Version` column helps to collapse the rows properly even if they are inserted in the wrong order. In contrast, `CollapsingMergeTree` allows only strictly consecutive insertion.
## Creating a Table {#creating-a-table} ## Creating a Table {#creating-a-table}
@ -47,7 +47,7 @@ VersionedCollapsingMergeTree(sign, version)
**Query Clauses** **Query Clauses**
When creating a `VersionedCollapsingMergeTree` table, the same [clauses](mergetree.md) are required as when creating a `MergeTree` table. When creating a `VersionedCollapsingMergeTree` table, the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required as when creating a `MergeTree` table.
<details markdown="1"> <details markdown="1">

View File

@ -7,7 +7,7 @@ toc_title: Dictionary
The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table. The `Dictionary` engine displays the [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) data as a ClickHouse table.
## Example ## Example {#example}
As an example, consider a dictionary of `products` with the following configuration: As an example, consider a dictionary of `products` with the following configuration:

View File

@ -10,7 +10,7 @@ Reading is automatically parallelized. During a read, the table indexes on remot
The Distributed engine accepts parameters: The Distributed engine accepts parameters:
- the cluster name in the server's config file - the cluster name in the servers config file
- the name of a remote database - the name of a remote database
@ -23,7 +23,7 @@ The Distributed engine accepts parameters:
See also: See also:
- `insert_distributed_sync` setting - `insert_distributed_sync` setting
- [MergeTree](../mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples - [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples
Example: Example:
@ -37,7 +37,7 @@ For example, for a query with GROUP BY, data will be aggregated on remote server
Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase(). Instead of the database name, you can use a constant expression that returns a string. For example: currentDatabase().
logs The cluster name in the server's config file. logs The cluster name in the servers config file.
Clusters are set like this: Clusters are set like this:
@ -82,7 +82,7 @@ Replicas are duplicating servers (in order to read all the data, you can access
Cluster names must not contain dots. Cluster names must not contain dots.
The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server: The parameters `host`, `port`, and optionally `user`, `password`, `secure`, `compression` are specified for each server:
- `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesn't start. If you change the DNS record, restart the server. - `host` The address of the remote server. You can use either the domain or the IPv4 or IPv6 address. If you specify the domain, the server makes a DNS request when it starts, and the result is stored as long as the server is running. If the DNS request fails, the server doesnt start. If you change the DNS record, restart the server.
- `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port. - `port` The TCP port for messenger activity (`tcp_port` in the config, usually set to 9000). Do not confuse it with http\_port.
- `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md). - `user` Name of the user for connecting to a remote server. Default value: default. This user must have access to connect to the specified server. Access is configured in the users.xml file. For more information, see the section [Access rights](../../../operations/access-rights.md).
- `password` The password for connecting to a remote server (not masked). Default value: empty string. - `password` The password for connecting to a remote server (not masked). Default value: empty string.
@ -99,38 +99,38 @@ You can specify as many clusters as you wish in the configuration.
To view your clusters, use the `system.clusters` table. To view your clusters, use the `system.clusters` table.
The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the cluster's servers). The Distributed engine allows working with a cluster like a local server. However, the cluster is inextensible: you must write its configuration in the server config file (even better, for all the clusters servers).
The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you don't need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md). The Distributed engine requires writing clusters to the config file. Clusters from the config file are updated on the fly, without restarting the server. If you need to send a query to an unknown set of shards and replicas each time, you dont need to create a Distributed table use the `remote` table function instead. See the section [Table functions](../../../sql-reference/table-functions/index.md).
There are two methods for writing data to a cluster: There are two methods for writing data to a cluster:
First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table "looks at". This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently. First, you can define which servers to write which data to and perform the write directly on each shard. In other words, perform INSERT in the tables that the distributed table “looks at”. This is the most flexible solution as you can use any sharding scheme, which could be non-trivial due to the requirements of the subject area. This is also the most optimal solution since data can be written to different shards completely independently.
Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesn't mean anything in this case. Second, you can perform INSERT in a Distributed table. In this case, the table will distribute the inserted data across the servers itself. In order to write to a Distributed table, it must have a sharding key set (the last parameter). In addition, if there is only one shard, the write operation works without specifying the sharding key, since it doesnt mean anything in this case.
Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19. Each shard can have a weight defined in the config file. By default, the weight is equal to one. Data is distributed across shards in the amount proportional to the shard weight. For example, if there are two shards and the first has a weight of 9 while the second has a weight of 10, the first will be sent 9 / 19 parts of the rows, and the second will be sent 10 / 19.
Each shard can have the `internal_replication` parameter defined in the config file. Each shard can have the `internal_replication` parameter defined in the config file.
If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table "looks at" replicated tables. In other words, if the table where data will be written is going to replicate them itself. If this parameter is set to `true`, the write operation selects the first healthy replica and writes data to it. Use this alternative if the Distributed table “looks at” replicated tables. In other words, if the table where data will be written is going to replicate them itself.
If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data. If it is set to `false` (the default), data is written to all replicas. In essence, this means that the Distributed table replicates data itself. This is worse than using replicated tables, because the consistency of replicas is not checked, and over time they will contain slightly different data.
To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19). To select the shard that a row of data is sent to, the sharding expression is analyzed, and its remainder is taken from dividing it by the total weight of the shards. The row is sent to the shard that corresponds to the half-interval of the remainders from `prev_weight` to `prev_weights + weight`, where `prev_weights` is the total weight of the shards with the smallest number, and `weight` is the weight of this shard. For example, if there are two shards, and the first has a weight of 9 while the second has a weight of 10, the row will be sent to the first shard for the remainders from the range \[0, 9), and to the second for the remainders from the range \[9, 19).
The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the user's ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID). The sharding expression can be any expression from constants and table columns that returns an integer. For example, you can use the expression `rand()` for random distribution of data, or `UserID` for distribution by the remainder from dividing the users ID (then the data of a single user will reside on a single shard, which simplifies running IN and JOIN by users). If one of the columns is not distributed evenly enough, you can wrap it in a hash function: intHash64(UserID).
A simple reminder from the division is a limited solution for sharding and isn't always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables. A simple reminder from the division is a limited solution for sharding and isnt always appropriate. It works for medium and large volumes of data (dozens of servers), but not for very large volumes of data (hundreds of servers or more). In the latter case, use the sharding scheme required by the subject area, rather than using entries in Distributed tables.
SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you don't have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently. SELECT queries are sent to all the shards and work regardless of how data is distributed across the shards (they can be distributed completely randomly). When you add a new shard, you dont have to transfer the old data to it. You can write new data with a heavier weight the data will be distributed slightly unevenly, but queries will work correctly and efficiently.
You should be concerned about the sharding scheme in the following cases: You should be concerned about the sharding scheme in the following cases:
- Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient. - Queries are used that require joining data (IN or JOIN) by a specific key. If data is sharded by this key, you can use local IN or JOIN instead of GLOBAL IN or GLOBAL JOIN, which is much more efficient.
- A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we've done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into "layers", where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries. - A large number of servers is used (hundreds or more) with a large number of small queries (queries of individual clients - websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as weve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. Distributed tables are created for each layer, and a single shared distributed table is created for global queries.
Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The period for sending data is managed by the [distributed\_directory\_monitor\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed\_directory\_monitor\_max\_sleep\_time\_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed\_directory\_monitor\_batch\_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting.
If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used. If the server ceased to exist or had a rough restart (for example, after a device failure) after an INSERT to a Distributed table, the inserted data might be lost. If a damaged data part is detected in the table directory, it is transferred to the `broken` subdirectory and no longer used.
@ -145,7 +145,7 @@ When the `max_parallel_replicas` option is enabled, query processing is parallel
**See Also** **See Also**
- [Virtual columns](index.md#table_engines-virtual_columns) - [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
- [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) - [background\_distributed\_schedule\_pool\_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/distributed/) <!--hide-->

View File

@ -33,7 +33,7 @@ You may manually create this subfolder and file in server filesystem and then [A
!!! warning "Warning" !!! warning "Warning"
Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined. Be careful with this functionality, because ClickHouse does not keep track of external changes to such files. The result of simultaneous writes via ClickHouse and outside of ClickHouse is undefined.
## Example ## Example {#example}
**1.** Set up the `file_engine_table` table: **1.** Set up the `file_engine_table` table:

View File

@ -25,7 +25,7 @@ Generate table engine supports only `SELECT` queries.
It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`.
## Example ## Example {#example}
**1.** Set up the `generate_engine_table` table: **1.** Set up the `generate_engine_table` table:

View File

@ -11,7 +11,7 @@ Reading is automatically parallelized. Writing to a table is not supported. When
The `Merge` engine accepts parameters: the database name and a regular expression for tables. The `Merge` engine accepts parameters: the database name and a regular expression for tables.
## Examples ## Examples {#examples}
Example 1: Example 1:
@ -67,6 +67,6 @@ FROM WatchLog
**See Also** **See Also**
- [Virtual columns](index.md#table_engines-virtual_columns) - [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/merge/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: URL
# URL Table Engine {#table_engines-url} # URL Table Engine {#table_engines-url}
Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](file.md) engine. Queries data to/from a remote HTTP/HTTPS server. This engine is similar to the [File](../../../engines/table-engines/special/file.md) engine.
Syntax: `URL(URL, Format)` Syntax: `URL(URL, Format)`
@ -25,7 +25,7 @@ respectively. For processing `POST` requests, the remote server must support
You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting. You can limit the maximum number of HTTP GET redirect hops using the [max\_http\_get\_redirects](../../../operations/settings/settings.md#setting-max_http_get_redirects) setting.
## Example ## Example {#example}
**1.** Create a `url_engine_table` table on the server : **1.** Create a `url_engine_table` table on the server :

View File

@ -1,58 +0,0 @@
---
toc_priority: 78
toc_title: General Questions
---
# General Questions {#general-questions}
## Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce}
We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT.
These systems arent appropriate for online queries due to their high latency. In other words, they cant be used as the back-end for a web interface. These types of systems arent useful for real-time data updates. Distributed sorting isnt the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks.
Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface.
## What If I Have a Problem with Encodings When Using Oracle Through ODBC? {#oracle-odbc-encodings}
If you use Oracle through the ODBC driver as a source of external dictionaries, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
**Example**
``` sql
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```
## How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file}
### Using INTO OUTFILE Clause {#using-into-outfile-clause}
Add an [INTO OUTFILE](../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query.
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file'
```
By default, ClickHouse uses the [TabSeparated](../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../interfaces/formats.md), use the [FORMAT clause](../sql-reference/statements/select/format.md#format-clause).
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
```
### Using a File-Engine Table {#using-a-file-engine-table}
See [File](../engines/table-engines/special/file.md).
### Using Command-Line Redirection {#using-command-line-redirection}
``` sql
$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
```
See [clickhouse-client](../interfaces/cli.md).
{## [Original article](https://clickhouse.tech/docs/en/faq/general/) ##}

View File

@ -0,0 +1,25 @@
---
title: What is a columnar database?
toc_hidden: true
toc_priority: 101
---
# What Is a Columnar Database? {#what-is-a-columnar-database}
A columnar database stores data of each column independently. This allows to read data from disks only for those columns that are used in any given query. The cost is that operations that affect whole rows become proportionally more expensive. The synonym for a columnar database is a column-oriented database management system. ClickHouse is a typical example of such a system.
Key columnar database advantages are:
- Queries that use only a few columns out of many.
- Aggregating queries against large volumes of data.
- Column-wise data compression.
Here is the illustration of the difference between traditional row-oriented systems and columnar databases when building reports:
**Traditional row-oriented**
![Traditional row-oriented](https://clickhouse.tech/docs/en/images/row-oriented.gif#)
**Columnar**
![Columnar](https://clickhouse.tech/docs/en/images/column-oriented.gif#)
A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but dont pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing because and data warehousing, they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables.

View File

@ -0,0 +1,17 @@
---
title: "What does \u201CClickHouse\u201D mean?"
toc_hidden: true
toc_priority: 10
---
# What Does “ClickHouse” Mean? {#what-does-clickhouse-mean}
Its a combination of “**Click**stream” and “Data ware**House**”. It comes from the original use case at Yandex.Metrica, where ClickHouse was supposed to keep records of all clicks by people from all over the Internet and it still does the job. You can read more about this use case on [ClickHouse history](../../introduction/history.md) page.
This two-part meaning has two consequences:
- The only correct way to write Click**H**ouse is with capital H.
- If you need to abbreviate it, use **CH**. For some historical reasons, abbreviating as CK is also popular in China, mostly because one of the first talks about ClickHouse in Chinese used this form.
!!! info "Fun fact"
Many years after ClickHouse got its name, this approach of combining two words that are meaningful on their own has been highlighted as the best way to name a database in a [research by Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), an Associate Professor of Databases at Carnegie Mellon University. ClickHouse shared his “best database name of all time” award with Postgres.

View File

@ -0,0 +1,24 @@
---
title: General questions about ClickHouse
toc_hidden_folder: true
toc_priority: 1
toc_title: General
---
# General Questions About ClickHouse {#general-questions}
Questions:
- [What is ClickHouse?](../../index.md#what-is-clickhouse)
- [Why ClickHouse is so fast?](../../faq/general/why-clickhouse-is-so-fast.md)
- [Who is using ClickHouse?](../../faq/general/who-is-using-clickhouse.md)
- [What does “ClickHouse” mean?](../../faq/general/dbms-naming.md)
- [What does “Не тормозит” mean?](../../faq/general/ne-tormozit.md)
- [What is OLAP?](../../faq/general/olap.md)
- [What is a columnar database?](../../faq/general/columnar-database.md)
- [Why not use something like MapReduce?](../../faq/general/mapreduce.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/general/) ##}

View File

@ -0,0 +1,13 @@
---
title: Why not use something like MapReduce?
toc_hidden: true
toc_priority: 110
---
# Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce}
We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT.
These systems arent appropriate for online queries due to their high latency. In other words, they cant be used as the back-end for a web interface. These types of systems arent useful for real-time data updates. Distributed sorting isnt the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks.
Most MapReduce implementations allow you to execute arbitrary code on a cluster. But a declarative query language is better suited to OLAP to run experiments quickly. For example, Hadoop has Hive and Pig. Also consider Cloudera Impala or Shark (outdated) for Spark, as well as Spark SQL, Presto, and Apache Drill. Performance when running such tasks is highly sub-optimal compared to specialized systems, but relatively high latency makes it unrealistic to use these systems as the backend for a web interface.

View File

@ -0,0 +1,26 @@
---
title: "What does \u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\
\u201D mean?"
toc_hidden: true
toc_priority: 11
---
# What Does “Не тормозит” Mean? {#what-does-ne-tormozit-mean}
This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front.
Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). Thats why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is.
One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it.
So, what does it mean? Here are some ways to translate *“не тормозит”*:
- If you translate it literally, itd be something like *“ClickHouse doesnt press the brake pedal”*.
- If youd want to express it as close to how it sounds to a Russian person with IT background, itd be something like *“If you larger system lags, its not because it uses ClickHouse”*.
- Shorter, but not so precise versions could be *“ClickHouse is not slow”*, *“ClickHouse doesnt lag”* or just *“ClickHouse is fast”*.
If you havent seen one of those t-shirts in person, you can check them out online in many ClickHouse-related videos. For example, this one:
![iframe](https://www.youtube.com/embed/bSyQahMVZ7w)
P.S. These t-shirts are not for sale, they are given away for free on most [ClickHouse Meetups](https://clickhouse.tech/#meet), usually for best questions or other forms of active participation.

View File

@ -0,0 +1,39 @@
---
title: What is OLAP?
toc_hidden: true
toc_priority: 100
---
# What Is OLAP? {#what-is-olap}
[OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward:
Processing
: Some source data is processed…
Analytical
: …to produce some analytical reports and insights…
Online
: …in real-time.
## OLAP from the Business Perspective {#olap-from-the-business-perspective}
In recent years, business people started to realize the value of data. Companies who make their decisions blindly, more often than not fail to keep up with the competition. The data-driven approach of successful companies forces them to collect all data that might be remotely useful for making business decisions and need mechanisms to timely analyze them. Heres where OLAP database management systems (DBMS) come in.
In a business sense, OLAP allows companies to continuously plan, analyze, and report operational activities, thus maximizing efficiency, reducing expenses, and ultimately conquering the market share. It could be done either in an in-house system or outsourced to SaaS providers like web/mobile analytics services, CRM services, etc. OLAP is the technology behind many BI applications (Business Intelligence).
ClickHouse is an OLAP database management system that is pretty often used as a backend for those SaaS solutions for analyzing domain-specific data. However, some businesses are still reluctant to share their data with third-party providers and an in-house data warehouse scenario is also viable.
## OLAP from the Technical Perspective {#olap-from-the-technical-perspective}
All database management systems could be classified into two groups: OLAP (Online **Analytical** Processing) and OLTP (Online **Transactional** Processing). Former focuses on building reports, each based on large volumes of historical data, but doing it not so frequently. While the latter usually handle a continuous stream of transactions, constantly modifying the current state of data.
In practice OLAP and OLTP are not categories, its more like a spectrum. Most real systems usually focus on one of them but provide some solutions or workarounds if the opposite kind of workload is also desired. This situation often forces businesses to operate multiple storage systems integrated, which might be not so big deal but having more systems make it more expensive to maintain. So the trend of recent years is HTAP (**Hybrid Transactional/Analytical Processing**) when both kinds of the workload are handled equally well by a single database management system.
Even if a DBMS started as a pure OLAP or pure OLTP, they are forced to move towards that HTAP direction to keep up with their competition. And ClickHouse is no exception, initially, it has been designed as [fast-as-possible OLAP system](../../faq/general/why-clickhouse-is-so-fast.md) and it still doesnt have full-fledged transaction support, but some features like consistent read/writes and mutations for updating/deleting data had to be added.
The fundamental trade-off between OLAP and OLTP systems remains:
- To build analytical reports efficiently its crucial to be able to read columns separately, thus most OLAP databases are [columnar](../../faq/general/columnar-database.md),
- While storing columns separately increases costs of operations on rows, like append or in-place modification, proportionally to the number of columns (which can be huge if the systems try to collect all details of an event just in case). Thus, most OLTP systems store data arranged by rows.

View File

@ -0,0 +1,19 @@
---
title: Who is using ClickHouse?
toc_hidden: true
toc_priority: 9
---
# Who Is Using ClickHouse? {#who-is-using-clickhouse}
Being an open-source product makes this question not so straightforward to answer. You dont have to tell anyone if you want to start using ClickHouse, you just go grab source code or pre-compiled packages. Theres no contract to sign and the [Apache 2.0 license](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) allows for unconstrained software distribution.
Also, the technology stack is often in a grey zone of whats covered by an NDA. Some companies consider technologies they use as a competitive advantage even if they are open-source and dont allow employees to share any details publicly. Some see some PR risks and allow employees to share implementation details only with their PR department approval.
So how to tell who is using ClickHouse?
One way is to **ask around**. If its not in writing, people are much more willing to share what technologies are used in their companies, what the use cases are, what kind of hardware is used, data volumes, etc. Were talking with users regularly on [ClickHouse Meetups](https://www.youtube.com/channel/UChtmrD-dsdpspr42P_PyRAw/playlists) all over the world and have heard stories about 1000+ companies that use ClickHouse. Unfortunately, thats not reproducible and we try to treat such stories as if they were told under NDA to avoid any potential troubles. But you can come to any of our future meetups and talk with other users on your own. There are multiple ways how meetups are announced, for example, you can subscribe to [our Twitter](http://twitter.com/ClickHouseDB/).
The second way is to look for companies **publicly saying** that they use ClickHouse. Its more substantial because theres usually some hard evidence like a blog post, talk video recording, slide deck, etc. We collect the collection of links to such evidence on our **[Adopters](../../introduction/adopters.md)** page. Feel free to contribute the story of your employer or just some links youve stumbled upon (but try not to violate your NDA in the process).
You can find names of very large companies in the adopters list, like Bloomberg, Cisco, China Telecom, Tencent, or Uber, but with the first approach, we found that there are many more. For example, if you take [the list of largest IT companies by Forbes (2020)](https://www.forbes.com/sites/hanktucker/2020/05/13/worlds-largest-technology-companies-2020-apple-stays-on-top-zoom-and-uber-debut/) over half of them are using ClickHouse in some way. Also, it would be unfair not to mention [Yandex](../../introduction/history.md), the company which initially open-sourced ClickHouse in 2016 and happens to be one of the largest IT companies in Europe.

View File

@ -0,0 +1,63 @@
---
title: Why ClickHouse is so fast?
toc_hidden: true
toc_priority: 8
---
# Why ClickHouse Is So Fast? {#why-clickhouse-is-so-fast}
It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system.
ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. Thats what needs to be done to build a typical analytical report and thats what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible:
Column-oriented storage
: Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted.
Indexes
: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns.
Data compression
: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create.md#create-query-specialized-codecs) that can make data even more compact.
Vectorized query execution
: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage.
Scalability
: ClickHouse can leverage all available CPU cores and disks to execute even a single query. Not only on a single server but all CPU cores and disks of a cluster as well.
But many other database management systems use similar techniques. What really makes ClickHouse stand out is **attention to low-level details**. Most programming languages provide implementations for most common algorithms and data structures, but they tend to be too generic to be effective. Every task can be considered as a landscape with various characteristics, instead of just throwing in random implementation. For example, if you need a hash table, here are some key questions to consider:
- Which hash function to choose?
- Collision resolution algorithm: [open addressing](https://en.wikipedia.org/wiki/Open_addressing) vs [chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)?
- Memory layout: one array for keys and values or separate arrays? Will it store small or large values?
- Fill factor: when and how to resize? How to move values around on resize?
- Will values be removed and which algorithm will work better if they will?
- Will we need fast probing with bitmaps, inline placement of string keys, support for non-movable values, prefetch, and batching?
Hash table is a key data structure for `GROUP BY` implementation and ClickHouse automatically chooses one of [30+ variations](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) for each specific query.
The same goes for algorithms, for example, in sorting you might consider:
- What will be sorted: an array of numbers, tuples, strings, or structures?
- Is all data available completely in RAM?
- Do we need a stable sort?
- Do we need a full sort? Maybe partial sort or n-th element will suffice?
- How to implement comparisons?
- Are we sorting data that has already been partially sorted?
Algorithms that they rely on characteristics of data they are working with can often do better than their generic counterparts. If it is not really known in advance, the system can try various implementations and choose the one that works best in runtime. For example, see an [article on how LZ4 decompression is implemented in ClickHouse](https://habr.com/en/company/yandex/blog/457612/).
Last but not least, the ClickHouse team always monitors the Internet on people claiming that they came up with the best implementation, algorithm, or data structure to do something and tries it out. Those claims mostly appear to be false, but from time to time youll indeed find a gem.
!!! info "Tips for building your own high-performance software"
- Keep in mind low-level details when designing your system.
- Design based on hardware capabilities.
- Choose data structures and abstractions based on the needs of the task.
- Provide specializations for special cases.
- Try new, “best” algorithms, that you read about yesterday.
- Choose an algorithm in runtime based on statistics.
- Benchmark on real datasets.
- Test for performance regressions in CI.
- Measure and observe everything.

View File

@ -1,9 +1,46 @@
--- ---
toc_folder_title: F.A.Q. toc_folder_title: F.A.Q.
toc_priority: 76
toc_title: hidden
toc_hidden: true toc_hidden: true
toc_priority: 76
--- ---
# ClickHouse F.A.Q {#clickhouse-f-a-q}
This section of the documentation is a place to collect answers to ClickHouse-related questions that arise often.
Categories:
- **[General](../faq/general/index.md)**
- [What is ClickHouse?](../index.md#what-is-clickhouse)
- [Why ClickHouse is so fast?](../faq/general/why-clickhouse-is-so-fast.md)
- [Who is using ClickHouse?](../faq/general/who-is-using-clickhouse.md)
- [What does “ClickHouse” mean?](../faq/general/dbms-naming.md)
- [What does “Не тормозит” mean?](../faq/general/ne-tormozit.md)
- [What is OLAP?](../faq/general/olap.md)
- [What is a columnar database?](../faq/general/columnar-database.md)
- [Why not use something like MapReduce?](../faq/general/mapreduce.md)
- **[Use Cases](../faq/use-cases/index.md)**
- [Can I use ClickHouse as a time-series database?](../faq/use-cases/time-series.md)
- [Can I use ClickHouse as a key-value storage?](../faq/use-cases/key-value.md)
- **[Operations](../faq/operations/index.md)**
- [Which ClickHouse version to use in production?](../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md)
- **[Integration](../faq/integration/index.md)**
- [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md)
- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md)
{## TODO
Question candidates:
- How to choose a primary key?
- How to add a column in ClickHouse?
- Too many parts
- How to filter ClickHouse table by an array column contents?
- How to insert all rows from one table to another of identical structure?
- How to kill a process (query) in ClickHouse?
- How to implement pivot (like in pandas)?
- How to remove the default ClickHouse user through users.d?
- Importing MySQL dump to Clickhouse
- Window function workarounds (row\_number, lag/lead, running diff/sum/average)
##}
{## [Original article](https://clickhouse.tech/docs/en/faq) ##} {## [Original article](https://clickhouse.tech/docs/en/faq) ##}

View File

@ -0,0 +1,37 @@
---
title: How do I export data from ClickHouse to a file?
toc_hidden: true
toc_priority: 10
---
# How Do I Export Data from ClickHouse to a File? {#how-to-export-to-file}
## Using INTO OUTFILE Clause {#using-into-outfile-clause}
Add an [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) clause to your query.
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file'
```
By default, ClickHouse uses the [TabSeparated](../../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../../interfaces/formats.md), use the [FORMAT clause](../../sql-reference/statements/select/format.md#format-clause).
For example:
``` sql
SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
```
## Using a File-Engine Table {#using-a-file-engine-table}
See [File](../../engines/table-engines/special/file.md) table engine.
## Using Command-Line Redirection {#using-command-line-redirection}
``` sql
$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt
```
See [clickhouse-client](../../interfaces/cli.md).

View File

@ -0,0 +1,19 @@
---
title: Questions about integrating ClickHouse and other systems
toc_hidden_folder: true
toc_priority: 4
toc_title: Integration
---
# Questions About Integrating ClickHouse and Other Systems {#question-about-integrating-clickhouse-and-other-systems}
Questions:
- [How do I export data from ClickHouse to a file?](../../faq/integration/file-export.md)
- [How to import JSON into ClickHouse?](../../faq/integration/json-import.md)
- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../../faq/integration/oracle-odbc.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/integration/) ##}

View File

@ -0,0 +1,33 @@
---
title: How to import JSON into ClickHouse?
toc_hidden: true
toc_priority: 11
---
# How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse}
ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline.
## Examples {#examples}
Using [HTTP interface](../../interfaces/http.md):
``` bash
$ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @-
```
Using [CLI interface](../../interfaces/cli.md):
``` bash
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT 20JSONEachRow"
```
Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.
## Useful Settings {#useful-settings}
- `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them).
- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type.
!!! note "Note"
Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the CLI interface.

View File

@ -0,0 +1,15 @@
---
title: What if I have a problem with encodings when using Oracle via ODBC?
toc_hidden: true
toc_priority: 20
---
# What If I Have a Problem with Encodings When Using Oracle Via ODBC? {#oracle-odbc-encodings}
If you use Oracle as a source of ClickHouse external dictionaries via Oracle ODBC driver, you need to set the correct value for the `NLS_LANG` environment variable in `/etc/default/clickhouse`. For more information, see the [Oracle NLS\_LANG FAQ](https://www.oracle.com/technetwork/products/globalization/nls-lang-099431.html).
**Example**
``` sql
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```

View File

@ -0,0 +1,42 @@
---
title: Is it possible to delete old records from a ClickHouse table?
toc_hidden: true
toc_priority: 20
---
# Is It Possible to Delete Old Records from a ClickHouse Table? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table}
The short answer is “yes”. ClickHouse has multiple mechanisms that allow freeing up disk space by removing old data. Each mechanism is aimed for different scenarios.
## TTL {#ttl}
ClickHouse allows to automatically drop values when some condition happens. This condition is configured as an expression based on any columns, usually just static offset for any timestamp column.
The key advantage of this approach is that it doesnt need any external system to trigger, once TTL is configured, data removal happens automatically in background.
!!! note "Note"
TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD.
More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
## ALTER DELETE {#alter-delete}
ClickHouse doesnt have real-time point deletes like in [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) databases. The closest thing to them are mutations. They are issued as `ALTER ... DELETE` or `ALTER ... UPDATE` queries to distinguish from normal `DELETE` or `UPDATE` as they are asynchronous batch operations, not immediate modifications. The rest of syntax after `ALTER TABLE` prefix is similar.
`ALTER DELETE` can be issued to flexibly remove old data. If you need to do it regularly, the main downside will be the need to have an external system to submit the query. There are also some performance considerations since mutation rewrite complete parts even theres only a single row to be deleted.
This is the most common approach to make your system based on ClickHouse [GDPR](https://gdpr-info.eu)-compliant.
More details on [mutations](../../sql-reference/statements/alter.md#alter-mutations).
## DROP PARTITION {#drop-partition}
`ALTER TABLE ... DROP PARTITION` provides a cost-efficient way to drop a whole partition. Its not that flexible and needs proper partitioning scheme configured on table creation, but still covers most common cases. Like mutations need to be executed from an external system for regular use.
More details on [manipulating partitions](../../sql-reference/statements/alter.md#alter_drop-partition).
## TRUNCATE {#truncate}
Its rather radical to drop all data from a table, but in some cases it might be exactly what you need.
More details on [table truncation](../../sql-reference/statements/alter.md#alter_drop-partition).

View File

@ -0,0 +1,18 @@
---
title: Question about operating ClickHouse servers and clusters
toc_hidden_folder: true
toc_priority: 3
toc_title: Operations
---
# Question About Operating ClickHouse Servers and Clusters {#question-about-operating-clickhouse-servers-and-clusters}
Questions:
- [Which ClickHouse version to use in production?](../../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/production/) ##}

View File

@ -0,0 +1,70 @@
---
title: Which ClickHouse version to use in production?
toc_hidden: true
toc_priority: 10
---
# Which ClickHouse Version to Use in Production? {#which-clickhouse-version-to-use-in-production}
First of all, lets discuss why people ask this question in the first place. There are two key reasons:
1. ClickHouse is developed with pretty high velocity and usually, there are 10+ stable releases per year. It makes a wide range of releases to choose from, which is not so trivial choice.
2. Some users want to avoid spending time figuring out which version works best for their use case and just follow someone elses advice.
The second reason is more fundamental, so well start with it and then get back to navigating through various ClickHouse releases.
## Which ClickHouse Version Do You Recommend? {#which-clickhouse-version-do-you-recommend}
Its tempting to hire consultants or trust some known experts to get rid of responsibility for your production environment. You install some specific ClickHouse version that someone else recommended, now if theres some issue with it - its not your fault, its someone elses. This line of reasoning is a big trap. No external person knows better whats going on in your companys production environment.
So how to properly choose which ClickHouse version to upgrade to? Or how to choose your first ClickHouse version? First of all, you need to invest in setting up a **realistic pre-production environment**. In an ideal world, it could be a completely identical shadow copy, but thats usually expensive.
Herere some key points to get reasonable fidelity in a pre-production environment with not so high costs:
- Pre-production environment needs to run an as close set of queries as you intend to run in production:
- Dont make it read-only with some frozen data.
- Dont make it write-only with just copying data without building some typical reports.
- Dont wipe it clean instead of applying schema migrations.
- Use a sample of real production data and queries. Try to choose a sample thats still representative and makes `SELECT` queries return reasonable results. Use obfuscation if your data is sensitive and internal policies dont allow it to leave the production environment.
- Make sure that pre-production is covered by your monitoring and alerting software the same way as your production environment does.
- If your production spans across multiple datacenters or regions, make your pre-production does the same.
- If your production uses complex features like replication, distributed table, cascading materialize views, make sure they are configured similarly in pre-production.
- Theres a trade-off on using the roughly same number of servers or VMs in pre-production as in production, but of smaller size, or much less of them, but of the same size. The first option might catch extra network-related issues, while the latter is easier to manage.
The second area to invest in is **automated testing infrastructure**. Dont assume that if some kind of query has executed successfully once, itll continue to do so forever. Its ok to have some unit tests where ClickHouse is mocked but make sure your product has a reasonable set of automated tests that are run against real ClickHouse and check that all important use cases are still working as expected.
Extra step forward could be contributing those automated tests to [ClickHouses open-source test infrastructure](https://github.com/ClickHouse/ClickHouse/tree/master/tests) thats continuously used in its day-to-day development. It definitely will take some additional time and effort to learn [how to run it](../../development/tests.md) and then how to adapt your tests to this framework, but itll pay off by ensuring that ClickHouse releases are already tested against them when they are announced stable, instead of repeatedly losing time on reporting the issue after the fact and then waiting for a bugfix to be implemented, backported and released. Some companies even have such test contributions to infrastructure by its use as an internal policy, most notably its called [Beyonces Rule](https://www.oreilly.com/library/view/software-engineering-at/9781492082781/ch01.html#policies_that_scale_well) at Google.
When you have your pre-production environment and testing infrastructure in place, choosing the best version is straightforward:
1. Routinely run your automated tests against new ClickHouse releases. You can do it even for ClickHouse releases that are marked as `testing`, but going forward to the next steps with them is not recommended.
2. Deploy the ClickHouse release that passed the tests to pre-production and check that all processes are running as expected.
3. Report any issues you discovered to [ClickHouse GitHub Issues](https://github.com/ClickHouse/ClickHouse/issues).
4. If there were no major issues, it should be safe to start deploying ClickHouse release to your production environment. Investing in gradual release automation that implements an approach similar to [canary releases](https://martinfowler.com/bliki/CanaryRelease.html) or [green-blue deployments](https://martinfowler.com/bliki/BlueGreenDeployment.html) might further reduce the risk of issues in production.
As you might have noticed, theres nothing specific to ClickHouse in the approach described above, people do that for any piece of infrastructure they rely on if they take their production environment seriously.
## How to Choose Between ClickHouse Releases? {#how-to-choose-between-clickhouse-releases}
If you look into contents of ClickHouse package repository, youll see four kinds of packages:
1. `testing`
2. `prestable`
3. `stable`
4. `lts` (long-term support)
As was mentioned earlier, `testing` is good mostly to notice issues early, running them in production is not recommended because each of them is not tested as thoroughly as other kinds of packages.
`prestable` is a release candidate which generally looks promising and is likely to become announced as `stable` soon. You can try them out in pre-production and report issues if you see any.
For production use, there are two key options: `stable` and `lts`. Here is some guidance on how to choose between them:
- `stable` is the kind of package we recommend by default. They are released roughly monthly (and thus provide new features with reasonable delay) and three latest stable releases are supported in terms of diagnostics and backporting of bugfixes.
- `lts` are released twice a year and are supported for a year after their initial release. You might prefer them over `stable` in the following cases:
- Your company has some internal policies that dont allow for frequent upgrades or using non-LTS software.
- You are using ClickHouse in some secondary products that either doesnt require any complex ClickHouse features and dont have enough resources to keep it updated.
Many teams who initially thought that `lts` is the way to go, often switch to `stable` anyway because of some recent feature thats important for their product.
!!! warning "Important"
One more thing to keep in mind when upgrading ClickHouse: were always keeping eye on compatibility across releases, but sometimes its not reasonable to keep and some minor details might change. So make sure you check the [changelog](../../whats-new/changelog/index.md) before upgrading to see if there are any notes about backward-incompatible changes.

View File

@ -0,0 +1,18 @@
---
title: Questions about ClickHouse use cases
toc_hidden_folder: true
toc_priority: 2
toc_title: Use Cases
---
# Questions About ClickHouse Use Cases {#questions-about-clickhouse-use-cases}
Questions:
- [Can I use ClickHouse as a time-series database?](../../faq/use-cases/time-series.md)
- [Can I use ClickHouse as a key-value storage?](../../faq/use-cases/key-value.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/use-cases/) ##}

View File

@ -0,0 +1,17 @@
---
title: Can I use ClickHouse as a key-value storage?
toc_hidden: true
toc_priority: 101
---
# Can I Use ClickHouse As a Key-Value Storage? {#can-i-use-clickhouse-as-a-key-value-storage}
The short answer is **“no”**. The key-value workload is among top positions in the list of cases when NOT{.text-danger} to use ClickHouse. Its an [OLAP](../../faq/general/olap.md) system after all, while there are many excellent key-value storage systems out there.
However, there might be situations where it still makes sense to use ClickHouse for key-value-like queries. Usually, its some low-budget products where the main workload is analytical in nature and fits ClickHouse well, but theres also some secondary process that needs a key-value pattern with not so high request throughput and without strict latency requirements. If you had an unlimited budget, you would have installed a secondary key-value database for thus secondary workload, but in reality, theres an additional cost of maintaining one more storage system (monitoring, backups, etc.) which might be desirable to avoid.
If you decide to go against recommendations and run some key-value-like queries against ClickHouse, herere some tips:
- The key reason why point queries are expensive in ClickHouse is its sparse primary index of main [MergeTree table engine family](../../engines/table-engines/mergetree-family/mergetree.md). This index cant point to each specific row of data, instead, it points to each N-th and the system has to scan from the neighboring N-th row to the desired one, reading excessive data along the way. In a key-value scenario, it might be useful to reduce the value of N with the `index_granularity` setting.
- ClickHouse keeps each column in a separate set of files, so to assemble one complete row it needs to go through each of those files. Their count increases linearly with the number of columns, so in the key-value scenario, it might be worth to avoid using many columns and put all your payload in a single `String` column encoded in some serialization format like JSON, Protobuf or whatever makes sense.
- Theres an alternative approach that uses [Join](../../engines/table-engines/special/join.md) table engine instead of normal `MergeTree` tables and [joinGet](../../sql-reference/functions/other-functions.md#joinget) function to retrieve the data. It can provide better query performance but might have some usability and reliability issues. Heres an [usage example](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00800_versatile_storage_join.sql#L49-L51).

View File

@ -0,0 +1,15 @@
---
title: Can I use ClickHouse as a time-series database?
toc_hidden: true
toc_priority: 101
---
# Can I Use ClickHouse As a Time-Series Database? {#can-i-use-clickhouse-as-a-time-series-database}
ClickHouse is a generic data storage solution for [OLAP](../../faq/general/olap.md) workloads, while there are many specialized time-series database management systems. Nevertheless, ClickHouses [focus on query execution speed](../../faq/general/why-clickhouse-is-so-fast.md) allows it to outperform specialized systems in many cases. There are many independent benchmarks on this topic out there ([example](https://medium.com/@AltinityDB/clickhouse-for-time-series-scalability-benchmarks-e181132a895b)), so were not going to conduct one here. Instead, lets focus on ClickHouse features that are important to use if thats your use case.
First of all, there are **[specialized codecs](../../sql-reference/statements/create.md#create-query-specialized-codecs)** which make typical time-series. Either common algorithms like `DoubleDelta` and `Gorilla` or specific to ClickHouse like `T64`.
Second, time-series queries often hit only recent data, like one day or one week old. It makes sense to use servers that have both fast nVME/SSD drives and high-capacity HDD drives. ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) feature allows to configure keeping fresh hot data on fast drives and gradually move it to slower drives as it ages. Rollup or removal of even older data is also possible if your requirements demand it.
Even though its against ClickHouse philosophy of storing and processing raw data, you can use [materialized views](../../sql-reference/statements/create.md#create-view) to fit into even tighter latency or costs requirements.

View File

@ -9,12 +9,12 @@ toc_title: Introduction
This section describes how to obtain example datasets and import them into ClickHouse. This section describes how to obtain example datasets and import them into ClickHouse.
For some datasets example queries are also available. For some datasets example queries are also available.
- [Anonymized Yandex.Metrica Dataset](metrica.md) - [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
- [Star Schema Benchmark](star-schema.md) - [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
- [WikiStat](wikistat.md) - [WikiStat](../../getting-started/example-datasets/wikistat.md)
- [Terabyte of Click Logs from Criteo](criteo.md) - [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](amplab-benchmark.md) - [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [New York Taxi Data](nyc-taxi.md) - [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
- [OnTime](ontime.md) - [OnTime](../../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide--> [Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: Yandex.Metrica Data
Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section. Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section.
The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_100m_obfuscated_v1.tar.xz. The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://clickhouse-datasets.s3.yandex.net/hits/tsv/hits\_100m\_obfuscated\_v1.tsv.xz and as prepared partitions at https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits\_100m\_obfuscated\_v1.tar.xz.
## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions} ## Obtaining Tables from Prepared Partitions {#obtaining-tables-from-prepared-partitions}

View File

@ -7,9 +7,9 @@ toc_title: hidden
# Getting Started {#getting-started} # Getting Started {#getting-started}
If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](install.md). After that you can: If you are new to ClickHouse and want to get a hands-on feeling of its performance, first of all, you need to go through the [installation process](../getting-started/install.md). After that you can:
- [Go through detailed tutorial](tutorial.md) - [Go through detailed tutorial](../getting-started/tutorial.md)
- [Experiment with example datasets](example-datasets/ontime.md) - [Experiment with example datasets](../getting-started/example-datasets/ontime.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/getting_started/) <!--hide-->

View File

@ -64,7 +64,7 @@ You can also download and install packages manually from [here](https://repo.cli
It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible.
The required version can be downloaded with `curl` or `wget` from repository https://repo.yandex.ru/clickhouse/tgz/. The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.tech/tgz/.
After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version: After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest version:
``` bash ``` bash
@ -94,6 +94,18 @@ For production environments, its recommended to use the latest `stable`-versi
To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside. To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside.
### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux}
For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. To run `clickhouse server`, you have to additionally download [server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) and [users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) configuration files from GitHub.
These builds are not recommended for use in production environments because they are less thoroughly tested, but you can do so on your own risk. They also have only a subset of ClickHouse features available.
### From Sources {#from-sources} ### From Sources {#from-sources}
To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md). To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md).

View File

@ -6,13 +6,13 @@ toc_title: Playground
# ClickHouse Playground {#clickhouse-playground} # ClickHouse Playground {#clickhouse-playground}
[ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. [ClickHouse Playground](https://play.clickhouse.tech) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster.
Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. There's also a selection of ClickHouse LTS releases to experiment with. Several example datasets are available in the Playground as well as sample queries that show ClickHouse features. Theres also a selection of ClickHouse LTS releases to experiment with.
ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md). ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md).
You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). You can make queries to playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md).
## Credentials ## Credentials {#credentials}
| Parameter | Value | | Parameter | Value |
|:--------------------|:----------------------------------------| |:--------------------|:----------------------------------------|
@ -23,13 +23,13 @@ You can make queries to playground using any HTTP client, for example [curl](htt
There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above): There are additional endpoints with specific ClickHouse releases to experiment with their differences (ports and user/password are the same as above):
* 20.3 LTS: `play-api-v20-3.clickhouse.tech` - 20.3 LTS: `play-api-v20-3.clickhouse.tech`
* 19.14 LTS: `play-api-v19-14.clickhouse.tech` - 19.14 LTS: `play-api-v19-14.clickhouse.tech`
!!! note "Note" !!! note "Note"
All these endpoints require a secure TLS connection. All these endpoints require a secure TLS connection.
## Limitations ## Limitations {#limitations}
The queries are executed as a read-only user. It implies some limitations: The queries are executed as a read-only user. It implies some limitations:
@ -37,12 +37,12 @@ The queries are executed as a read-only user. It implies some limitations:
- INSERT queries are not allowed - INSERT queries are not allowed
The following settings are also enforced: The following settings are also enforced:
- [max_result_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes) - [max\_result\_bytes=10485760](../operations/settings/query_complexity/#max-result-bytes)
- [max_result_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows) - [max\_result\_rows=2000](../operations/settings/query_complexity/#setting-max_result_rows)
- [result_overflow_mode=break](../operations/settings/query_complexity/#result-overflow-mode) - [result\_overflow\_mode=break](../operations/settings/query_complexity/#result-overflow-mode)
- [max_execution_time=60000](../operations/settings/query_complexity/#max-execution-time) - [max\_execution\_time=60000](../operations/settings/query_complexity/#max-execution-time)
## Examples ## Examples {#examples}
HTTPS endpoint example with `curl`: HTTPS endpoint example with `curl`:
@ -51,11 +51,12 @@ curl "https://play-api.clickhouse.tech:8443/?query=SELECT+'Play+ClickHouse!';&us
``` ```
TCP endpoint example with [CLI](../interfaces/cli.md): TCP endpoint example with [CLI](../interfaces/cli.md):
``` bash ``` bash
clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'" clickhouse client --secure -h play-api.clickhouse.tech --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse!'"
``` ```
## Implementation Details ## Implementation Details {#implementation-details}
ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md). ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md).
The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add extra layer of protection and improved global connectivity. The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add extra layer of protection and improved global connectivity.

View File

@ -11,7 +11,7 @@ By going through this tutorial, youll learn how to set up a simple ClickHouse
## Single Node Setup {#single-node-setup} ## Single Node Setup {#single-node-setup}
To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](install.md#install-from-deb-packages) or [rpm](install.md#from-rpm-packages) packages, but there are [alternatives](install.md#from-docker-image) for the operating systems that do no support them. To postpone the complexities of a distributed environment, well start with deploying ClickHouse on a single server or virtual machine. ClickHouse is usually installed from [deb](../getting-started/install.md#install-from-deb-packages) or [rpm](../getting-started/install.md#from-rpm-packages) packages, but there are [alternatives](../getting-started/install.md#from-docker-image) for the operating systems that do no support them.
For example, you have chosen `deb` packages and executed: For example, you have chosen `deb` packages and executed:
@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv
## Import Sample Dataset {#import-sample-dataset} ## Import Sample Dataset {#import-sample-dataset}
Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one. Now its time to fill our ClickHouse server with some sample data. In this tutorial, well use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, well go with the most realistic one.
### Download and Extract Table Data {#download-and-extract-table-data} ### Download and Extract Table Data {#download-and-extract-table-data}

View File

@ -232,6 +232,6 @@ FROM
``` ```
!!! note "Note" !!! note "Note"
More info about [avg()](../sql-reference/aggregate-functions/reference.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions. More info about [avg()](../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) and [log()](../sql-reference/functions/math-functions.md) functions.
[Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/guides/apply_catboost_model/) <!--hide-->

View File

@ -9,6 +9,6 @@ toc_title: Overview
List of detailed step-by-step instructions that help to solve various tasks using ClickHouse: List of detailed step-by-step instructions that help to solve various tasks using ClickHouse:
- [Tutorial on simple cluster set-up](../getting-started/tutorial.md) - [Tutorial on simple cluster set-up](../getting-started/tutorial.md)
- [Applying a CatBoost model in ClickHouse](apply-catboost-model.md) - [Applying a CatBoost model in ClickHouse](../guides/apply-catboost-model.md)
[Original article](https://clickhouse.tech/docs/en/guides/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/guides/) <!--hide-->

View File

@ -1147,11 +1147,11 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e
[Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats. [Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats.
`Arrow` is Apache Arrow's "file mode" format. It is designed for in-memory random access. `Arrow` is Apache Arrows “file mode” format. It is designed for in-memory random access.
## ArrowStream {#data-format-arrow-stream} ## ArrowStream {#data-format-arrow-stream}
`ArrowStream` is Apache Arrow's "stream mode" format. It is designed for in-memory stream processing. `ArrowStream` is Apache Arrows “stream mode” format. It is designed for in-memory stream processing.
## ORC {#data-format-orc} ## ORC {#data-format-orc}

View File

@ -275,7 +275,7 @@ Use buffering to avoid situations where a query processing error occurred after
### Queries with Parameters {#cli-queries-with-parameters} ### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](cli.md#cli-queries-with-parameters). You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters).
### Example {#example} ### Example {#example}
@ -291,7 +291,7 @@ ClickHouse supports specific queries through the HTTP interface. For example, yo
$ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @- $ echo '(4),(5),(6)' | curl 'http://localhost:8123/?query=INSERT%20INTO%20t%20VALUES' --data-binary @-
``` ```
ClickHouse also supports Predefined HTTP Interface which can help you more easy integration with third party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter). ClickHouse also supports Predefined HTTP Interface which can help you more easily integrate with third-party tools like [Prometheus exporter](https://github.com/percona-lab/clickhouse_exporter).
Example: Example:
@ -314,7 +314,7 @@ Example:
</http_handlers> </http_handlers>
``` ```
- You can now request the url directly for data in the Prometheus format: - You can now request the URL directly for data in the Prometheus format:
<!-- --> <!-- -->
@ -361,41 +361,40 @@ $ curl -v 'http://localhost:8123/predefined_query'
* Connection #0 to host localhost left intact * Connection #0 to host localhost left intact
* Connection #0 to host localhost left intact * Connection #0 to host localhost left intact
``` ```
As you can see from the example, if `<http_handlers>` is configured in the config.xml file and `<http_handlers>` can contain many `<rule>s`. ClickHouse will match the HTTP requests received to the predefined type in `<rule>` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful. As you can see from the example if `http_handlers` is configured in the config.xml file and `http_handlers` can contain many `rules`. ClickHouse will match the HTTP requests received to the predefined type in `rule` and the first matched runs the handler. Then ClickHouse will execute the corresponding predefined query if the match is successful.
> Now `<rule>` can configure `<method>`, `<headers>`, `<url>`,`<handler>`: Now `rule` can configure `method`, `headers`, `url`, `handler`:
> `<method>` is responsible for matching the method part of the HTTP request. `<method>` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request. - `method` is responsible for matching the method part of the HTTP request. `method` fully conforms to the definition of [method](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods) in the HTTP protocol. It is an optional configuration. If it is not defined in the configuration file, it does not match the method portion of the HTTP request.
>
> `<url>` is responsible for matching the url part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the url portion of the HTTP request.
>
> `<headers>` is responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
>
> `<handler>` contains the main processing part. Now `<handler>` can configure `<type>`, `<status>`, `<content_type>`, `<response_content>`, `<query>`, `<query_param_name>`.
> \> `<type>` currently supports three types: **predefined\_query\_handler**, **dynamic\_query\_handler**, **static**.
> \>
> \> `<query>` - use with predefined\_query\_handler type, executes query when the handler is called.
> \>
> \> `<query_param_name>` - use with dynamic\_query\_handler type, extracts and executes the value corresponding to the `<query_param_name>` value in HTTP request params.
> \>
> \> `<status>` - use with static type, response status code.
> \>
> \> `<content_type>` - use with static type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
> \>
> \> `<response_content>` - use with static type, Response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration send to client.
Next are the configuration methods for the different `<type>`. - `url` is responsible for matching the URL part of the HTTP request. It is compatible with [RE2](https://github.com/google/re2)s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the URL portion of the HTTP request.
## predefined\_query\_handler {#predefined_query_handler} - `headers` are responsible for matching the header part of the HTTP request. It is compatible with RE2s regular expressions. It is an optional configuration. If it is not defined in the configuration file, it does not match the header portion of the HTTP request.
`<predefined_query_handler>` supports setting Settings and query\_params values. You can configure `<query>` in the type of `<predefined_query_handler>`. - `handler` contains the main processing part. Now `handler` can configure `type`, `status`, `content_type`, `response_content`, `query`, `query_param_name`.
`type` currently supports three types: [predefined_query_handler](#predefined_query_handler), [dynamic_query_handler](#dynamic_query_handler), [static](#static).
`<query>` value is a predefined query of `<predefined_query_handler>`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration. - `query` — use with `predefined_query_handler` type, executes query when the handler is called.
The following example defines the values of `max_threads` and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully. - `query_param_name` — use with `dynamic_query_handler` type, extracts and executes the value corresponding to the `query_param_name` value in HTTP request params.
- `status` — use with `static` type, response status code.
- `content_type` — use with `static` type, response [content-type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type).
- `response_content` — use with `static` type, response content sent to client, when using the prefix file:// or config://, find the content from the file or configuration sends to client.
Next are the configuration methods for different `type`.
### predefined_query_handler {#predefined_query_handler}
`predefined_query_handler` supports setting `Settings` and `query_params` values. You can configure `query` in the type of `predefined_query_handler`.
`query` value is a predefined query of `predefined_query_handler`, which is executed by ClickHouse when an HTTP request is matched and the result of the query is returned. It is a must configuration.
The following example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` settings, then queries the system table to check whether these settings were set successfully.
Example: Example:
@ -424,15 +423,15 @@ max_alter_threads 2
``` ```
!!! note "caution" !!! note "caution"
In one `<predefined_query_handler>` only supports one `<query>` of an insert type. In one `predefined_query_handler` only supports one `query` of an insert type.
## dynamic\_query\_handler {#dynamic_query_handler} ### dynamic_query_handler {#dynamic_query_handler}
In `<dynamic_query_handler>`, query is written in the form of param of the HTTP request. The difference is that in `<predefined_query_handler>`, query is wrote in the configuration file. You can configure `<query_param_name>` in `<dynamic_query_handler>`. In `dynamic_query_handler`, the query is written in the form of param of the HTTP request. The difference is that in `predefined_query_handler`, the query is written in the configuration file. You can configure `query_param_name` in `dynamic_query_handler`.
ClickHouse extracts and executes the value corresponding to the `<query_param_name>` value in the url of the HTTP request. The default value of `<query_param_name>` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in. ClickHouse extracts and executes the value corresponding to the `query_param_name` value in the URL of the HTTP request. The default value of `query_param_name` is `/query` . It is an optional configuration. If there is no definition in the configuration file, the param is not passed in.
To experiment with this functionality, the example defines the values of max\_threads and max\_alter\_threads and queries whether the Settings were set successfully. To experiment with this functionality, the example defines the values of [max_threads](../operations/settings/settings.md#settings-max_threads) and `max_alter_threads` and `queries` whether the settings were set successfully.
Example: Example:
@ -455,9 +454,9 @@ max_threads 1
max_alter_threads 2 max_alter_threads 2
``` ```
## static {#static} ### static {#static}
`<static>` can return [content\_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and response\_content. response\_content can return the specified content `static` can return [content_type](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type), [status](https://developer.mozilla.org/en-US/docs/Web/HTTP/Status) and `response_content`. `response_content` can return the specified content.
Example: Example:

View File

@ -9,19 +9,19 @@ toc_title: Introduction
ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security): ClickHouse provides two network interfaces (both can be optionally wrapped in TLS for additional security):
- [HTTP](http.md), which is documented and easy to use directly. - [HTTP](http.md), which is documented and easy to use directly.
- [Native TCP](tcp.md), which has less overhead. - [Native TCP](../interfaces/tcp.md), which has less overhead.
In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following: In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following:
- [Command-line client](cli.md) - [Command-line client](../interfaces/cli.md)
- [JDBC driver](jdbc.md) - [JDBC driver](../interfaces/jdbc.md)
- [ODBC driver](odbc.md) - [ODBC driver](../interfaces/odbc.md)
- [C++ client library](cpp.md) - [C++ client library](../interfaces/cpp.md)
There are also a wide range of third-party libraries for working with ClickHouse: There are also a wide range of third-party libraries for working with ClickHouse:
- [Client libraries](third-party/client-libraries.md) - [Client libraries](../interfaces/third-party/client-libraries.md)
- [Integrations](third-party/integrations.md) - [Integrations](../interfaces/third-party/integrations.md)
- [Visual interfaces](third-party/gui.md) - [Visual interfaces](../interfaces/third-party/gui.md)
[Original article](https://clickhouse.tech/docs/en/interfaces/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/interfaces/) <!--hide-->

View File

@ -5,6 +5,6 @@ toc_title: Native Interface (TCP)
# Native Interface (TCP) {#native-interface-tcp} # Native Interface (TCP) {#native-interface-tcp}
The native protocol is used in the [command-line client](cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic. The native protocol is used in the [command-line client](../interfaces/cli.md), for inter-server communication during distributed query processing, and also in other C++ programs. Unfortunately, native ClickHouse protocol does not have formal specification yet, but it can be reverse-engineered from ClickHouse source code (starting [around here](https://github.com/ClickHouse/ClickHouse/tree/master/src/Client)) and/or by intercepting and analyzing TCP traffic.
[Original article](https://clickhouse.tech/docs/en/interfaces/tcp/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/interfaces/tcp/) <!--hide-->

View File

@ -12,6 +12,7 @@ toc_title: Integrations
- Relational database management systems - Relational database management systems
- [MySQL](https://www.mysql.com) - [MySQL](https://www.mysql.com)
- [mysql2ch](https://github.com/long2ice/mysql2ch)
- [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support)
- [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader) - [clickhouse-mysql-data-reader](https://github.com/Altinity/clickhouse-mysql-data-reader)
- [horgh-replicator](https://github.com/larsnovikov/horgh-replicator) - [horgh-replicator](https://github.com/larsnovikov/horgh-replicator)
@ -97,5 +98,12 @@ toc_title: Integrations
- Elixir - Elixir
- [Ecto](https://github.com/elixir-ecto/ecto) - [Ecto](https://github.com/elixir-ecto/ecto)
- [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto) - [clickhouse\_ecto](https://github.com/appodeal/clickhouse_ecto)
- Ruby
- [Ruby on Rails](https://rubyonrails.org/)
- [activecube](https://github.com/bitquery/activecube)
- [ActiveRecord](https://github.com/PNixx/clickhouse-activerecord)
- [GraphQL](https://github.com/graphql)
- [activecube-graphql](https://github.com/bitquery/activecube-graphql)
[Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/interfaces/third-party/integrations/) <!--hide-->

View File

@ -9,77 +9,77 @@ toc_title: Adopters
The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. Wed appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you wont have any NDA issues by doing so. Providing updates with publications from other companies is also useful. The following list of companies using ClickHouse and their success stories is assembled from public sources, thus might differ from current reality. Wed appreciate it if you share the story of adopting ClickHouse in your company and [add it to the list](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/introduction/adopters.md), but please make sure you wont have any NDA issues by doing so. Providing updates with publications from other companies is also useful.
| Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference | | Company | Industry | Usecase | Cluster Size | (Un)Compressed Data Size<abbr title="of single replica"><sup>\*</sup></abbr> | Reference |
|---------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| |------------------------------------------------------------------------------------------------|---------------------------------|-----------------------|------------------------------------------------------------|------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| [2gis](https://2gis.ru){.favicon} | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) | | <a href="https://2gis.ru" class="favicon">2gis</a> | Maps | Monitoring | — | — | [Talk in Russian, July 2019](https://youtu.be/58sPkXfq6nw) |
| [Aloha&nbsp;Browser](https://alohabrowser.com/){.favicon} | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) | | <a href="https://alohabrowser.com/" class="favicon">Aloha Browser</a> | Mobile App | Browser backend | — | — | [Slides in Russian, May 2019](https://presentations.clickhouse.tech/meetup22/aloha.pdf) |
| [Amadeus](https://amadeus.com/){.favicon} | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) | | <a href="https://amadeus.com/" class="favicon">Amadeus</a> | Travel | Analytics | — | — | [Press Release, April 2018](https://www.altinity.com/blog/2018/4/5/amadeus-technologies-launches-investment-and-insights-tool-based-on-machine-learning-and-strategy-algorithms) |
| [Appsflyer](https://www.appsflyer.com){.favicon} | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) | | <a href="https://www.appsflyer.com" class="favicon">Appsflyer</a> | Mobile analytics | Main product | — | — | [Talk in Russian, July 2019](https://www.youtube.com/watch?v=M3wbRlcpBbY) |
| [ArenaData](https://arenadata.tech/){.favicon} | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) | | <a href="https://arenadata.tech/" class="favicon">ArenaData</a> | Data Platform | Main product | — | — | [Slides in Russian, December 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup38/indexes.pdf) |
| [Badoo](https://badoo.com){.favicon} | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) | | <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
| [Benocs](https://www.benocs.com/){.favicon} | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) | | <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| [Bloomberg](https://www.bloomberg.com/){.favicon} | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) | | <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| [Bloxy](https://bloxy.info){.favicon} | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) | | <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| [Dataliance for China Telecom](https://www.chinatelecomglobal.com/){.favicon} | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) | | <a href="https://www.chinatelecomglobal.com/" class="favicon">Dataliance for China Telecom</a> | Telecom | Analytics | — | — | [Slides in Chinese, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/telecom.pdf) |
| [CARTO](https://carto.com/){.favicon} | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) | | <a href="https://carto.com/" class="favicon">CARTO</a> | Business Intelligence | Geo analytics | — | — | [Geospatial processing with ClickHouse](https://carto.com/blog/geospatial-processing-with-clickhouse/) |
| [CERN](http://public.web.cern.ch/public/){.favicon} | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) | | <a href="http://public.web.cern.ch/public/" class="favicon">CERN</a> | Research | Experiment | — | — | [Press release, April 2012](https://www.yandex.com/company/press_center/press_releases/2012/2012-04-10/) |
| [Cisco](http://cisco.com/){.favicon} | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) | | <a href="http://cisco.com/" class="favicon">Cisco</a> | Networking | Traffic analysis | — | — | [Lightning talk, October 2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
| [Citadel&nbsp;Securities](https://www.citadelsecurities.com/){.favicon} | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) | | <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | Finance | — | — | — | [Contribution, March 2019](https://github.com/ClickHouse/ClickHouse/pull/4774) |
| [Citymobil](https://city-mobil.ru){.favicon} | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) | | <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
| [ContentSquare](https://contentsquare.com){.favicon} | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) | | <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| [Cloudflare](https://cloudflare.com){.favicon} | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) | | <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
| [Corunet](https://coru.net/){.favicon} | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) | | <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
| [CraiditX 氪信](https://www.creditx.com){.favicon} | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) | | <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
| [Criteo](https://www.criteo.com/){.favicon} | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) | | <a href="https://www.criteo.com/" class="favicon">Criteo</a> | Retail | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/3_storetail.pptx) |
| [Deutsche&nbsp;Bank](https://db.com){.favicon} | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) | | <a href="https://db.com" class="favicon">Deutsche Bank</a> | Finance | BI Analytics | — | — | [Slides in English, October 2019](https://bigdatadays.ru/wp-content/uploads/2019/10/D2-H3-3_Yakunin-Goihburg.pdf) |
| [Diva-e](https://www.diva-e.com){.favicon} | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) | | <a href="https://www.diva-e.com" class="favicon">Diva-e</a> | Digital consulting | Main Product | — | — | [Slides in English, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup29/ClickHouse-MeetUp-Unusual-Applications-sd-2019-09-17.pdf) |
| [Exness](https://www.exness.com){.favicon} | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) | | <a href="https://www.exness.com" class="favicon">Exness</a> | Trading | Metrics, Logging | — | — | [Talk in Russian, May 2019](https://youtu.be/_rpU-TvSfZ8?t=3215) |
| [Geniee](https://geniee.co.jp){.favicon} | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) | | <a href="https://geniee.co.jp" class="favicon">Geniee</a> | Ad network | Main product | — | — | [Blog post in Japanese, July 2017](https://tech.geniee.co.jp/entry/2017/07/20/160100) |
| [HUYA](https://www.huya.com/){.favicon} | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) | | <a href="https://www.huya.com/" class="favicon">HUYA</a> | Video Streaming | Analytics | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/7.%20ClickHouse万亿数据分析实践%20李本旺(sundy-li)%20虎牙.pdf) |
| [Idealista](https://www.idealista.com){.favicon} | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.yandex/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) | | <a href="https://www.idealista.com" class="favicon">Idealista</a> | Real Estate | Analytics | — | — | [Blog Post in English, April 2019](https://clickhouse.tech/blog/en/clickhouse-meetup-in-madrid-on-april-2-2019) |
| [Infovista](https://www.infovista.com/){.favicon} | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) | | <a href="https://www.infovista.com/" class="favicon">Infovista</a> | Networks | Analytics | — | — | [Slides in English, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup30/infovista.pdf) |
| [InnoGames](https://www.innogames.com){.favicon} | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) | | <a href="https://www.innogames.com" class="favicon">InnoGames</a> | Games | Metrics, Logging | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/graphite_and_clickHouse.pdf) |
| [Integros](https://integros.com){.favicon} | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | | <a href="https://integros.com" class="favicon">Integros</a> | Platform for video services | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| [Kodiak Data](https://www.kodiakdata.com/){.favicon} | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) | | <a href="https://www.kodiakdata.com/" class="favicon">Kodiak Data</a> | Clouds | Main product | — | — | [Slides in Engish, April 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup13/kodiak_data.pdf) |
| [Kontur](https://kontur.ru){.favicon} | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) | | <a href="https://kontur.ru" class="favicon">Kontur</a> | Software Development | Metrics | — | — | [Talk in Russian, November 2018](https://www.youtube.com/watch?v=U4u4Bd0FtrY) |
| [Lawrence Berkeley National Laboratory](https://www.lbl.gov){.favicon} | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) | | <a href="https://www.lbl.gov" class="favicon">Lawrence Berkeley National Laboratory</a> | Research | Traffic analysis | 1 server | 11.8 TiB | [Slides in English, April 2019](https://www.smitasin.com/presentations/2019-04-17_DOE-NSM.pdf) |
| [LifeStreet](https://lifestreet.com/){.favicon} | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) | | <a href="https://lifestreet.com/" class="favicon">LifeStreet</a> | Ad network | Main product | 75 servers (3 replicas) | 5.27 PiB | [Blog post in Russian, February 2017](https://habr.com/en/post/322620/) |
| [Mail.ru Cloud Solutions](https://mcs.mail.ru/){.favicon} | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) | | <a href="https://mcs.mail.ru/" class="favicon">Mail.ru Cloud Solutions</a> | Cloud services | Main product | — | — | [Article in Russian](https://mcs.mail.ru/help/db-create/clickhouse#) |
| [MessageBird](https://www.messagebird.com){.favicon} | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) | | <a href="https://www.messagebird.com" class="favicon">MessageBird</a> | Telecommunications | Statistics | — | — | [Slides in English, November 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup20/messagebird.pdf) |
| [MGID](https://www.mgid.com/){.favicon} | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) | | <a href="https://www.mgid.com/" class="favicon">MGID</a> | Ad network | Web-analytics | — | — | [Blog post in Russian, April 2020](http://gs-studio.com/news-about-it/32777----clickhouse---c) |
| [OneAPM](https://www.oneapm.com/){.favicon} | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) | | <a href="https://www.oneapm.com/" class="favicon">OneAPM</a> | Monitorings and Data Analysis | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/8.%20clickhouse在OneAPM的应用%20杜龙.pdf) |
| [Pragma&nbsp;Innovation](http://www.pragma-innovation.fr/){.favicon} | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) | | <a href="http://www.pragma-innovation.fr/" class="favicon">Pragma Innovation</a> | Telemetry and Big Data Analysis | Main product | — | — | [Slides in English, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup18/4_pragma_innovation.pdf) |
| [QINGCLOUD](https://www.qingcloud.com/){.favicon} | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) | | <a href="https://www.qingcloud.com/" class="favicon">QINGCLOUD</a> | Cloud services | Main product | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/4.%20Cloud%20%2B%20TSDB%20for%20ClickHouse%20张健%20QingCloud.pdf) |
| [Qrator](https://qrator.net){.favicon} | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) | | <a href="https://qrator.net" class="favicon">Qrator</a> | DDoS protection | Main product | — | — | [Blog Post, March 2019](https://blog.qrator.net/en/clickhouse-ddos-mitigation_37/) |
| [Percent 百分点](https://www.percent.cn/){.favicon} | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) | | <a href="https://www.percent.cn/" class="favicon">Percent 百分点</a> | Analytics | Main Product | — | — | [Slides in Chinese, June 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/4.%20ClickHouse万亿数据双中心的设计与实践%20.pdf) |
| [Rambler](https://rambler.ru){.favicon} | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) | | <a href="https://rambler.ru" class="favicon">Rambler</a> | Internet services | Analytics | — | — | [Talk in Russian, April 2018](https://medium.com/@ramblertop/разработка-api-clickhouse-для-рамблер-топ-100-f4c7e56f3141) |
| [Tencent](https://www.tencent.com){.favicon} | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) | | <a href="https://www.tencent.com" class="favicon">Tencent</a> | Messaging | Logging | — | — | [Talk in Chinese, November 2019](https://youtu.be/T-iVQRuw-QY?t=5050) |
| [Traffic&nbsp;Stars](https://trafficstars.com/){.favicon} | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) | | <a href="https://trafficstars.com/" class="favicon">Traffic Stars</a> | AD network | — | — | — | [Slides in Russian, May 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup15/lightning/ninja.pdf) |
| [S7&nbsp;Airlines](https://www.s7.ru){.favicon} | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) | | <a href="https://www.s7.ru" class="favicon">S7 Airlines</a> | Airlines | Metrics, Logging | — | — | [Talk in Russian, March 2019](https://www.youtube.com/watch?v=nwG68klRpPg&t=15s) |
| [SEMrush](https://www.semrush.com/){.favicon} | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) | | <a href="https://www.semrush.com/" class="favicon">SEMrush</a> | Marketing | Main product | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/5_semrush.pdf) |
| [scireum&nbsp;GmbH](https://www.scireum.de/){.favicon} | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) | | <a href="https://www.scireum.de/" class="favicon">scireum GmbH</a> | e-Commerce | Main product | — | — | [Talk in German, February 2020](https://www.youtube.com/watch?v=7QWAn5RbyR4) |
| [Sentry](https://sentry.io/){.favicon} | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) | | <a href="https://sentry.io/" class="favicon">Sentry</a> | Software Development | Main product | — | — | [Blog Post in English, May 2019](https://blog.sentry.io/2019/05/16/introducing-snuba-sentrys-new-search-infrastructure) |
| [SGK](http://www.sgk.gov.tr/wps/portal/sgk/tr){.favicon} | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) | | <a href="http://www.sgk.gov.tr/wps/portal/sgk/tr" class="favicon">SGK</a> | Goverment Social Security | Analytics | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/ClickHouse%20Meetup-Ramazan%20POLAT.pdf) |
| [seo.do](https://seo.do/){.favicon} | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) | | <a href="https://seo.do/" class="favicon">seo.do</a> | Analytics | Main product | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup35/CH%20Presentation-%20Metehan%20Çetinkaya.pdf) |
| [Sina](http://english.sina.com/index.html){.favicon} | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) | | <a href="http://english.sina.com/index.html" class="favicon">Sina</a> | News | — | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/6.%20ClickHouse最佳实践%20高鹏_新浪.pdf) |
| [SMI2](https://smi2.ru/){.favicon} | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) | | <a href="https://smi2.ru/" class="favicon">SMI2</a> | News | Analytics | — | — | [Blog Post in Russian, November 2017](https://habr.com/ru/company/smi2/blog/314558/) |
| [Splunk](https://www.splunk.com/){.favicon} | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) | | <a href="https://www.splunk.com/" class="favicon">Splunk</a> | Business Analytics | Main product | — | — | [Slides in English, January 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup12/splunk.pdf) |
| [Spotify](https://www.spotify.com){.favicon} | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) | | <a href="https://www.spotify.com" class="favicon">Spotify</a> | Music | Experimentation | — | — | [Slides, July 2018](https://www.slideshare.net/glebus/using-clickhouse-for-experimentation-104247173) |
| [Tencent](https://www.tencent.com){.favicon} | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) | | <a href="https://www.tencent.com" class="favicon">Tencent</a> | Big Data | Data processing | — | — | [Slides in Chinese, October 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup19/5.%20ClickHouse大数据集群应用_李俊飞腾讯网媒事业部.pdf) |
| [Uber](https://www.uber.com){.favicon} | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) | | <a href="https://www.uber.com" class="favicon">Uber</a> | Taxi | Logging | — | — | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/uber.pdf) |
| [VKontakte](https://vk.com){.favicon} | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) | | <a href="https://vk.com" class="favicon">VKontakte</a> | Social Network | Statistics, Logging | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/3_vk.pdf) |
| [Wisebits](https://wisebits.com/){.favicon} | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) | | <a href="https://wisebits.com/" class="favicon">Wisebits</a> | IT Solutions | Analytics | — | — | [Slides in Russian, May 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup22/strategies.pdf) |
| [Xiaoxin&nbsp;Tech](http://www.xiaoxintech.cn/){.favicon} | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) | | <a href="http://www.xiaoxintech.cn/" class="favicon">Xiaoxin Tech</a> | Education | Common purpose | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/sync-clickhouse-with-mysql-mongodb.pptx) |
| [Ximalaya](https://www.ximalaya.com/){.favicon} | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) | | <a href="https://www.ximalaya.com/" class="favicon">Ximalaya</a> | Audio sharing | OLAP | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/ximalaya.pdf) |
| [Yandex&nbsp;Cloud](https://cloud.yandex.ru/services/managed-clickhouse){.favicon} | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) | | <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| [Yandex&nbsp;DataLens](https://cloud.yandex.ru/services/datalens){.favicon} | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) | | <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/datalens.pdf) |
| [Yandex&nbsp;Market](https://market.yandex.ru/){.favicon} | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) | | <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| [Yandex&nbsp;Metrica](https://metrica.yandex.com){.favicon} | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) | | <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 360 servers in one cluster, 1862 servers in one department | 66.41 PiB / 5.68 PiB | [Slides, February 2020](https://presentations.clickhouse.tech/meetup40/introduction/#13) |
| [ЦВТ](https://htc-cs.ru/){.favicon} | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) | | <a href="https://htc-cs.ru/" class="favicon">ЦВТ</a> | Software Development | Metrics, Logging | — | — | [Blog Post, March 2019, in Russian](https://vc.ru/dev/62715-kak-my-stroili-monitoring-na-prometheus-clickhouse-i-elk) |
| [МКБ](https://mkb.ru/){.favicon} | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) | | <a href="https://mkb.ru/" class="favicon">МКБ</a> | Bank | Web-system monitoring | — | — | [Slides in Russian, September 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup28/mkb.pdf) |
| [Jinshuju 金数据](https://jinshuju.net){.favicon} | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) | | <a href="https://jinshuju.net" class="favicon">Jinshuju 金数据</a> | BI Analytics | Main product | — | — | [Slides in Chinese, October 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup24/3.%20金数据数据架构调整方案Public.pdf) |
| [Instana](https://www.instana.com){.favicon} | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) | | <a href="https://www.instana.com" class="favicon">Instana</a> | APM Platform | Main product | — | — | [Twitter post](https://twitter.com/mieldonkers/status/1248884119158882304) |
| [Wargaming](https://wargaming.com/en/){.favicon} | Games | | — | — | [Interview](https://habr.com/en/post/496954/) | | <a href="https://wargaming.com/en/" class="favicon">Wargaming</a> | Games | | — | — | [Interview](https://habr.com/en/post/496954/) |
| [Crazypanda](https://crazypanda.ru/en/){.favicon} | Games | | — | — | Live session on ClickHouse meetup | | <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |
| [FunCorp](https://fun.co/rp){.favicon} | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) | | <a href="https://fun.co/rp" class="favicon">FunCorp</a> | Games | | — | — | [Article](https://www.altinity.com/blog/migrating-from-redshift-to-clickhouse) |
[Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: Distinctive Features
# Distinctive Features of ClickHouse {#distinctive-features-of-clickhouse} # Distinctive Features of ClickHouse {#distinctive-features-of-clickhouse}
## True Column-Oriented DBMS {#true-column-oriented-dbms} ## True Column-Oriented Database Management System {#true-column-oriented-dbms}
In a true column-oriented DBMS, no extra data is stored with the values. Among other things, this means that constant-length values must be supported, to avoid storing their length “number” next to the values. As an example, a billion UInt8-type values should consume around 1 GB uncompressed, or this strongly affects the CPU use. It is essential to store data compactly (without any “garbage”) even when uncompressed, since the speed of decompression (CPU usage) depends mainly on the volume of uncompressed data. In a true column-oriented DBMS, no extra data is stored with the values. Among other things, this means that constant-length values must be supported, to avoid storing their length “number” next to the values. As an example, a billion UInt8-type values should consume around 1 GB uncompressed, or this strongly affects the CPU use. It is essential to store data compactly (without any “garbage”) even when uncompressed, since the speed of decompression (CPU usage) depends mainly on the volume of uncompressed data.
@ -15,11 +15,15 @@ Its also worth noting that ClickHouse is a database management system, not a
## Data Compression {#data-compression} ## Data Compression {#data-compression}
Some column-oriented DBMSs (InfiniDB CE and MonetDB) do not use data compression. However, data compression does play a key role in achieving excellent performance. Some column-oriented DBMSs do not use data compression. However, data compression does play a key role in achieving excellent performance.
In addition to efficient general-purpose compression codecs with different trade-offs between disk space and CPU consumption, ClickHouse provides [specialized codecs](../sql-reference/statements/create.md#create-query-specialized-codecs) for specific kinds of data, which allow ClickHouse to compete with and outperform more niche databases, like time-series ones.
## Disk Storage of Data {#disk-storage-of-data} ## Disk Storage of Data {#disk-storage-of-data}
Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis. ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. Keeping data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is necessary for real-time analysis.
ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available.
## Parallel Processing on Multiple Cores {#parallel-processing-on-multiple-cores} ## Parallel Processing on Multiple Cores {#parallel-processing-on-multiple-cores}
@ -28,15 +32,18 @@ Large queries are parallelized naturally, taking all the necessary resources ava
## Distributed Processing on Multiple Servers {#distributed-processing-on-multiple-servers} ## Distributed Processing on Multiple Servers {#distributed-processing-on-multiple-servers}
Almost none of the columnar DBMSs mentioned above have support for distributed query processing. Almost none of the columnar DBMSs mentioned above have support for distributed query processing.
In ClickHouse, data can reside on different shards. Each shard can be a group of replicas used for fault tolerance. All shards are used to run a query in parallel, transparently for the user. In ClickHouse, data can reside on different shards. Each shard can be a group of replicas used for fault tolerance. All shards are used to run a query in parallel, transparently for the user.
## SQL Support {#sql-support} ## SQL Support {#sql-support}
ClickHouse supports a declarative query language based on SQL that is identical to the SQL standard in many cases. ClickHouse supports a [declarative query language based on SQL](../sql-reference/index.md) that is identical to the ANSI SQL standard in [many cases](../sql-reference/ansi.md).
Supported queries include GROUP BY, ORDER BY, subqueries in FROM, IN, and JOIN clauses, and scalar subqueries.
Dependent subqueries and window functions are not supported.
## Vector Engine {#vector-engine} Supported queries include [GROUP BY](../sql-reference/statements/select/group-by.md), [ORDER BY](../sql-reference/statements/select/order-by.md), subqueries in [FROM](../sql-reference/statements/select/from.md), [JOIN](../sql-reference/statements/select/join.md) clause, [IN](../sql-reference/operators/in.md) operator, and scalar subqueries.
Correlated (dependent) subqueries and window functions are not supported at the time of writing but might become available in the future.
## Vector Computation Engine {#vector-engine}
Data is not only stored by columns but is processed by vectors (parts of columns), which allows achieving high CPU efficiency. Data is not only stored by columns but is processed by vectors (parts of columns), which allows achieving high CPU efficiency.
@ -44,13 +51,19 @@ Data is not only stored by columns but is processed by vectors (parts of columns
ClickHouse supports tables with a primary key. To quickly perform queries on the range of the primary key, the data is sorted incrementally using the merge tree. Due to this, data can continually be added to the table. No locks are taken when new data is ingested. ClickHouse supports tables with a primary key. To quickly perform queries on the range of the primary key, the data is sorted incrementally using the merge tree. Due to this, data can continually be added to the table. No locks are taken when new data is ingested.
## Index {#index} ## Primary Index {#primary-index}
Having a data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds. Having a data physically sorted by primary key makes it possible to extract data for its specific values or value ranges with low latency, less than a few dozen milliseconds.
## Secondary Indexes {#secondary-indexes}
Unlike other database management systems, secondary indexes in ClickHouse does not point to specific rows or row ranges. Instead, they allow the database to know in advance that all rows in some data parts wouldn't match the query filtering conditions and do not read them at all, thus they are called [data skipping indexes](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-data_skipping-indexes).
## Suitable for Online Queries {#suitable-for-online-queries} ## Suitable for Online Queries {#suitable-for-online-queries}
Low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online. Most OLAP database management systems don't aim for online queries with sub-second latencies. In alternative systems, report building time of tens of seconds or even minutes is often considered acceptable. Sometimes it takes even more which forces to prepare reports offline (in advance or by responding with "come back later").
In ClickHouse low latency means that queries can be processed without delay and without trying to prepare an answer in advance, right at the same moment while the user interface page is loading. In other words, online.
## Support for Approximated Calculations {#support-for-approximated-calculations} ## Support for Approximated Calculations {#support-for-approximated-calculations}
@ -60,16 +73,24 @@ ClickHouse provides various ways to trade accuracy for performance:
2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk. 2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk.
3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources. 3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources.
## Adaptive Join Algorithm
ClickHouse adaptively chooses how to [JOIN](../sql-reference/statements/select/join.md) multiple tables, by preferring hash-join algorithm and falling back to the merge-join algorithm if there's more than one large table.
## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support} ## Data Replication and Data Integrity Support {#data-replication-and-data-integrity-support}
ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases. ClickHouse uses asynchronous multi-master replication. After being written to any available replica, all the remaining replicas retrieve their copy in the background. The system maintains identical data on different replicas. Recovery after most failures is performed automatically, or semi-automatically in complex cases.
For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md). For more information, see the section [Data replication](../engines/table-engines/mergetree-family/replication.md).
## Role-Based Access Control
ClickHouse implements user account management using SQL queries and allows for [role-based access control configuration](../operations/access-rights.md) similar to what can be found in ANSI SQL standard and popular relational database management systems.
## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages} ## Features that Can Be Considered Disadvantages {#clickhouse-features-that-can-be-considered-disadvantages}
1. No full-fledged transactions. 1. No full-fledged transactions.
2. Lack of ability to modify or delete already inserted data with high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example to comply with [GDPR](https://gdpr-info.eu). 2. Lack of ability to modify or delete already inserted data with a high rate and low latency. There are batch deletes and updates available to clean up or modify data, for example, to comply with [GDPR](https://gdpr-info.eu).
3. The sparse index makes ClickHouse not so suitable for point queries retrieving single rows by their keys. 3. The sparse index makes ClickHouse not so efficient for point queries retrieving single rows by their keys.
[Original article](https://clickhouse.tech/docs/en/introduction/distinctive_features/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/introduction/distinctive-features/) <!--hide-->

Some files were not shown because too many files have changed in this diff Show More