Merge remote-tracking branch 'origin/master' into HEAD

This commit is contained in:
Alexander Kuzmenkov 2020-06-26 21:01:40 +03:00
commit c8b633fe47
518 changed files with 9919 additions and 4063 deletions

1
.gitignore vendored
View File

@ -12,6 +12,7 @@
/build /build
/build_* /build_*
/build-* /build-*
/tests/venv
/docs/build /docs/build
/docs/publish /docs/publish

3
.gitmodules vendored
View File

@ -168,3 +168,6 @@
[submodule "contrib/fmtlib"] [submodule "contrib/fmtlib"]
path = contrib/fmtlib path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git url = https://github.com/fmtlib/fmt.git
[submodule "contrib/sentry-native"]
path = contrib/sentry-native
url = https://github.com/getsentry/sentry-native.git

View File

@ -289,8 +289,9 @@ set (CMAKE_POSTFIX_VARIABLE "CMAKE_${CMAKE_BUILD_TYPE_UC}_POSTFIX")
if (MAKE_STATIC_LIBRARIES) if (MAKE_STATIC_LIBRARIES)
set (CMAKE_POSITION_INDEPENDENT_CODE OFF) set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
if (OS_LINUX) if (OS_LINUX AND NOT ARCH_ARM)
# Slightly more efficient code can be generated # Slightly more efficient code can be generated
# It's disabled for ARM because otherwise ClickHouse cannot run on Android.
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie") set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-no-pie")
@ -361,6 +362,7 @@ include (cmake/find/orc.cmake)
include (cmake/find/avro.cmake) include (cmake/find/avro.cmake)
include (cmake/find/msgpack.cmake) include (cmake/find/msgpack.cmake)
include (cmake/find/cassandra.cmake) include (cmake/find/cassandra.cmake)
include (cmake/find/sentry.cmake)
find_contrib_lib(cityhash) find_contrib_lib(cityhash)
find_contrib_lib(farmhash) find_contrib_lib(farmhash)

View File

@ -1,4 +1,5 @@
#include <daemon/BaseDaemon.h> #include <daemon/BaseDaemon.h>
#include <daemon/SentryWriter.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <sys/types.h> #include <sys/types.h>
@ -288,7 +289,7 @@ private:
std::stringstream bare_stacktrace; std::stringstream bare_stacktrace;
bare_stacktrace << "Stack trace:"; bare_stacktrace << "Stack trace:";
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
bare_stacktrace << ' ' << stack_trace.getFrames()[i]; bare_stacktrace << ' ' << stack_trace.getFramePointers()[i];
LOG_FATAL(log, bare_stacktrace.str()); LOG_FATAL(log, bare_stacktrace.str());
} }
@ -296,9 +297,20 @@ private:
/// Write symbolized stack trace line by line for better grep-ability. /// Write symbolized stack trace line by line for better grep-ability.
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
/// Send crash report to developers (if configured)
#if defined(__ELF__) && !defined(__FreeBSD__)
const String & build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
#else
String build_id_hex{};
#endif
SentryWriter::onFault(sig, info, context, stack_trace, build_id_hex);
/// When everything is done, we will try to send these error messages to client. /// When everything is done, we will try to send these error messages to client.
if (thread_ptr) if (thread_ptr)
thread_ptr->onFatalError(); thread_ptr->onFatalError();
} }
}; };
@ -330,7 +342,7 @@ static void sanitizerDeathCallback()
std::stringstream bare_stacktrace; std::stringstream bare_stacktrace;
bare_stacktrace << "Stack trace:"; bare_stacktrace << "Stack trace:";
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
bare_stacktrace << ' ' << stack_trace.getFrames()[i]; bare_stacktrace << ' ' << stack_trace.getFramePointers()[i];
LOG_FATAL(log, bare_stacktrace.str()); LOG_FATAL(log, bare_stacktrace.str());
} }
@ -529,6 +541,7 @@ void debugIncreaseOOMScore() {}
void BaseDaemon::initialize(Application & self) void BaseDaemon::initialize(Application & self)
{ {
closeFDs(); closeFDs();
task_manager = std::make_unique<Poco::TaskManager>(); task_manager = std::make_unique<Poco::TaskManager>();
ServerApplication::initialize(self); ServerApplication::initialize(self);
@ -536,7 +549,6 @@ void BaseDaemon::initialize(Application & self)
argsToConfig(argv(), config(), PRIO_APPLICATION - 100); argsToConfig(argv(), config(), PRIO_APPLICATION - 100);
bool is_daemon = config().getBool("application.runAsDaemon", false); bool is_daemon = config().getBool("application.runAsDaemon", false);
if (is_daemon) if (is_daemon)
{ {
/** When creating pid file and looking for config, will search for paths relative to the working path of the program when started. /** When creating pid file and looking for config, will search for paths relative to the working path of the program when started.
@ -672,6 +684,7 @@ void BaseDaemon::initialize(Application & self)
void BaseDaemon::initializeTerminationAndSignalProcessing() void BaseDaemon::initializeTerminationAndSignalProcessing()
{ {
SentryWriter::initialize(config());
std::set_terminate(terminate_handler); std::set_terminate(terminate_handler);
/// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead. /// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead.

View File

@ -1,7 +1,13 @@
add_library (daemon add_library (daemon
BaseDaemon.cpp BaseDaemon.cpp
GraphiteWriter.cpp GraphiteWriter.cpp
SentryWriter.cpp
) )
target_include_directories (daemon PUBLIC ..) target_include_directories (daemon PUBLIC ..)
target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES})
if (USE_SENTRY)
target_link_libraries (daemon PRIVATE curl)
target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY})
endif ()

View File

@ -0,0 +1,250 @@
#include <daemon/SentryWriter.h>
#include <Poco/File.h>
#include <Poco/Util/Application.h>
#include <common/defines.h>
#include <common/getFQDNOrHostName.h>
#include <common/logger_useful.h>
#if !defined(ARCADIA_BUILD)
# include "Common/config_version.h"
# include <Common/config.h>
#endif
#if USE_SENTRY
# include <sentry.h> // Y_IGNORE
# include <stdio.h>
# include <filesystem>
#endif
#if USE_SENTRY
namespace
{
bool initialized = false;
bool anonymize = false;
void setExtras()
{
if (!anonymize)
{
sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str()));
}
sentry_set_tag("version", VERSION_STRING);
sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH));
sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE));
sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER));
sentry_set_extra("version_revision", sentry_value_new_int32(VERSION_REVISION));
sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR));
sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR));
sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH));
}
void sentry_logger(sentry_level_t level, const char * message, va_list args)
{
auto * logger = &Poco::Logger::get("SentryWriter");
size_t size = 1024;
char buffer[size];
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wformat-nonliteral"
#endif
if (vsnprintf(buffer, size, message, args) >= 0)
{
#ifdef __clang__
#pragma clang diagnostic pop
#endif
switch (level)
{
case SENTRY_LEVEL_DEBUG:
logger->debug(buffer);
break;
case SENTRY_LEVEL_INFO:
logger->information(buffer);
break;
case SENTRY_LEVEL_WARNING:
logger->warning(buffer);
break;
case SENTRY_LEVEL_ERROR:
logger->error(buffer);
break;
case SENTRY_LEVEL_FATAL:
logger->fatal(buffer);
break;
}
}
}
}
#endif
void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config)
{
#if USE_SENTRY
bool enabled = false;
bool debug = config.getBool("send_crash_reports.debug", false);
auto * logger = &Poco::Logger::get("SentryWriter");
if (config.getBool("send_crash_reports.enabled", false))
{
if (debug || (strlen(VERSION_OFFICIAL) > 0))
{
enabled = true;
}
}
if (enabled)
{
const std::filesystem::path & default_tmp_path = std::filesystem::path(config.getString("tmp_path", Poco::Path::temp())) / "sentry";
const std::string & endpoint
= config.getString("send_crash_reports.endpoint");
const std::string & temp_folder_path
= config.getString("send_crash_reports.tmp_path", default_tmp_path);
Poco::File(temp_folder_path).createDirectories();
sentry_options_t * options = sentry_options_new(); /// will be freed by sentry_init or sentry_shutdown
sentry_options_set_release(options, VERSION_STRING_SHORT);
sentry_options_set_logger(options, &sentry_logger);
if (debug)
{
sentry_options_set_debug(options, 1);
}
sentry_options_set_dsn(options, endpoint.c_str());
sentry_options_set_database_path(options, temp_folder_path.c_str());
if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts"))
{
sentry_options_set_environment(options, "prod");
}
else
{
sentry_options_set_environment(options, "test");
}
const std::string & http_proxy = config.getString("send_crash_reports.http_proxy", "");
if (!http_proxy.empty())
{
sentry_options_set_http_proxy(options, http_proxy.c_str());
}
int init_status = sentry_init(options);
if (!init_status)
{
initialized = true;
anonymize = config.getBool("send_crash_reports.anonymize", false);
LOG_INFO(
logger,
"Sending crash reports is initialized with {} endpoint and {} temp folder{}",
endpoint,
temp_folder_path,
anonymize ? " (anonymized)" : "");
}
else
{
LOG_WARNING(logger, "Sending crash reports failed to initialize with {} status", init_status);
}
}
else
{
LOG_INFO(logger, "Sending crash reports is disabled");
}
#else
UNUSED(config);
#endif
}
void SentryWriter::shutdown()
{
#if USE_SENTRY
if (initialized)
{
sentry_shutdown();
}
#endif
}
void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace, const String & build_id_hex)
{
#if USE_SENTRY
auto * logger = &Poco::Logger::get("SentryWriter");
if (initialized)
{
const std::string & error_message = signalToErrorMessage(sig, info, context);
sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str());
sentry_set_tag("signal", strsignal(sig));
sentry_set_extra("signal_number", sentry_value_new_int32(sig));
if (!build_id_hex.empty())
{
sentry_set_tag("build_id", build_id_hex.c_str());
}
setExtras();
/// Prepare data for https://develop.sentry.dev/sdk/event-payloads/stacktrace/
sentry_value_t sentry_frames = sentry_value_new_list();
size_t stack_size = stack_trace.getSize();
if (stack_size > 0)
{
ssize_t offset = stack_trace.getOffset();
char instruction_addr[100];
StackTrace::Frames frames;
StackTrace::symbolize(stack_trace.getFramePointers(), offset, stack_size, frames);
for (ssize_t i = stack_size - 1; i >= offset; --i)
{
const StackTrace::Frame & current_frame = frames[i];
sentry_value_t sentry_frame = sentry_value_new_object();
UInt64 frame_ptr = reinterpret_cast<UInt64>(current_frame.virtual_addr);
if (std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIx64, frame_ptr) >= 0)
{
sentry_value_set_by_key(sentry_frame, "instruction_addr", sentry_value_new_string(instruction_addr));
}
if (current_frame.symbol.has_value())
{
sentry_value_set_by_key(sentry_frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str()));
}
if (current_frame.file.has_value())
{
sentry_value_set_by_key(sentry_frame, "filename", sentry_value_new_string(current_frame.file.value().c_str()));
}
if (current_frame.line.has_value())
{
sentry_value_set_by_key(sentry_frame, "lineno", sentry_value_new_int32(current_frame.line.value()));
}
sentry_value_append(sentry_frames, sentry_frame);
}
}
/// Prepare data for https://develop.sentry.dev/sdk/event-payloads/threads/
/// Stacktrace is filled only for a single thread that failed
sentry_value_t stacktrace = sentry_value_new_object();
sentry_value_set_by_key(stacktrace, "frames", sentry_frames);
sentry_value_t thread = sentry_value_new_object();
sentry_value_set_by_key(thread, "stacktrace", stacktrace);
sentry_value_t values = sentry_value_new_list();
sentry_value_append(values, thread);
sentry_value_t threads = sentry_value_new_object();
sentry_value_set_by_key(threads, "values", values);
sentry_value_set_by_key(event, "threads", threads);
LOG_INFO(logger, "Sending crash report");
sentry_capture_event(event);
shutdown();
}
else
{
LOG_INFO(logger, "Not sending crash report");
}
#else
UNUSED(sig);
UNUSED(info);
UNUSED(context);
UNUSED(stack_trace);
UNUSED(build_id_hex);
#endif
}

View File

@ -0,0 +1,33 @@
#pragma once
#include <common/types.h>
#include <Common/StackTrace.h>
#include <Poco/Util/LayeredConfiguration.h>
#include <string>
/// \brief Sends crash reports to ClickHouse core developer team via https://sentry.io
///
/// This feature can enabled with "send_crash_reports.enabled" server setting,
/// in this case reports are sent only for official ClickHouse builds.
///
/// It is possible to send those reports to your own sentry account or account of consulting company you hired
/// by overriding "send_crash_reports.endpoint" setting. "send_crash_reports.debug" setting will allow to do that for
class SentryWriter
{
public:
SentryWriter() = delete;
static void initialize(Poco::Util::LayeredConfiguration & config);
static void shutdown();
/// Not signal safe and can't be called from a signal handler
static void onFault(
int sig,
const siginfo_t & info,
const ucontext_t & context,
const StackTrace & stack_trace,
const String & build_id_hex
);
};

View File

@ -9,6 +9,7 @@ PEERDIR(
SRCS( SRCS(
BaseDaemon.cpp BaseDaemon.cpp
GraphiteWriter.cpp GraphiteWriter.cpp
SentryWriter.cpp
) )
END() END()

21
cmake/find/sentry.cmake Normal file
View File

@ -0,0 +1,21 @@
set (SENTRY_LIBRARY "sentry")
set (SENTRY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native/include")
if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h")
message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init --recursive")
return()
endif ()
if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT_UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG))
option (USE_SENTRY "Use Sentry" ON)
set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib)
set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include)
set (SENTRY_TRANSPORT "curl" CACHE STRING "")
set (SENTRY_BACKEND "none" CACHE STRING "")
set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "")
set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "")
set (SENTRY_PIC OFF CACHE BOOL "")
set (BUILD_SHARED_LIBS OFF)
message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}")
include_directories("${SENTRY_INCLUDE_DIR}")
endif ()

View File

@ -14,6 +14,7 @@ endif ()
set (VERSION_NAME "${PROJECT_NAME}") set (VERSION_NAME "${PROJECT_NAME}")
set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}") set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}")
set (VERSION_SO "${VERSION_STRING}") set (VERSION_SO "${VERSION_STRING}")
set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}")
math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000")

View File

@ -263,7 +263,7 @@ if (USE_INTERNAL_GRPC_LIBRARY)
add_subdirectory(grpc-cmake) add_subdirectory(grpc-cmake)
endif () endif ()
if (USE_INTERNAL_AWS_S3_LIBRARY) if (USE_INTERNAL_AWS_S3_LIBRARY OR USE_SENTRY)
set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
set (save_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) set (save_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES})
set (save_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) set (save_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES})
@ -275,12 +275,18 @@ if (USE_INTERNAL_AWS_S3_LIBRARY)
set (CMAKE_CMAKE_REQUIRED_INCLUDES ${save_CMAKE_REQUIRED_INCLUDES}) set (CMAKE_CMAKE_REQUIRED_INCLUDES ${save_CMAKE_REQUIRED_INCLUDES})
set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS}) set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS})
set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH}) set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH})
# The library is large - avoid bloat.
target_compile_options (curl PRIVATE -g0)
endif ()
if (USE_INTERNAL_AWS_S3_LIBRARY)
add_subdirectory(aws-s3-cmake) add_subdirectory(aws-s3-cmake)
# The library is large - avoid bloat. # The library is large - avoid bloat.
target_compile_options (aws_s3 PRIVATE -g0) target_compile_options (aws_s3 PRIVATE -g0)
target_compile_options (aws_s3_checksums PRIVATE -g0) target_compile_options (aws_s3_checksums PRIVATE -g0)
target_compile_options (curl PRIVATE -g0)
endif () endif ()
if (USE_BASE64) if (USE_BASE64)
@ -300,5 +306,9 @@ if (USE_CASSANDRA)
add_subdirectory (cassandra) add_subdirectory (cassandra)
endif() endif()
if (USE_SENTRY)
add_subdirectory (sentry-native)
endif()
add_subdirectory (fmtlib-cmake) add_subdirectory (fmtlib-cmake)

2
contrib/avro vendored

@ -1 +1 @@
Subproject commit 6cfcf6c24293af100d523b89b61d1ab216fa4735 Subproject commit 92caca2d42fc9a97e34e95f963593539d32ed331

View File

@ -1,4 +1,6 @@
set (CURL_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl) set (CURL_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl)
set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib)
set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include)
set (SRCS set (SRCS
${CURL_DIR}/lib/file.c ${CURL_DIR}/lib/file.c

View File

@ -1,14 +1,19 @@
option (ENABLE_JEMALLOC "Enable jemalloc allocator" ${ENABLE_LIBRARIES}) option (ENABLE_JEMALLOC "Enable jemalloc allocator" ${ENABLE_LIBRARIES})
if (SANITIZE OR NOT OS_LINUX OR NOT (ARCH_AMD64 OR ARCH_ARM)) if (SANITIZE OR NOT (ARCH_AMD64 OR ARCH_ARM) OR NOT (OS_LINUX OR OS_FREEBSD OR OS_DARWIN))
set (ENABLE_JEMALLOC OFF) set (ENABLE_JEMALLOC OFF)
message (STATUS "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used on Linux with x86_64 or aarch64.") message (STATUS "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64 or aarch64 on linux or freebsd.")
endif () endif ()
if (ENABLE_JEMALLOC) if (ENABLE_JEMALLOC)
if (NOT OS_LINUX)
message (WARNING "jemalloc support on non-linux is EXPERIMENTAL")
endif()
option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED}) option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED})
if (USE_INTERNAL_JEMALLOC) if (USE_INTERNAL_JEMALLOC)
if (OS_LINUX)
# ThreadPool select job randomly, and there can be some threads that had been # ThreadPool select job randomly, and there can be some threads that had been
# performed some memory heavy task before and will be inactive for some time, # performed some memory heavy task before and will be inactive for some time,
# but until it will became active again, the memory will not be freed since by # but until it will became active again, the memory will not be freed since by
@ -18,6 +23,9 @@ if (ENABLE_JEMALLOC)
# By enabling percpu_arena number of arenas limited to number of CPUs and hence # By enabling percpu_arena number of arenas limited to number of CPUs and hence
# this problem should go away. # this problem should go away.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0") set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0")
else()
set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0")
endif()
# CACHE variable is empty, to allow changing defaults without necessity # CACHE variable is empty, to allow changing defaults without necessity
# to purge cache # to purge cache
set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" ) set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" )
@ -71,14 +79,26 @@ if (ENABLE_JEMALLOC)
target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include) target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
target_include_directories(jemalloc SYSTEM PUBLIC include) target_include_directories(jemalloc SYSTEM PUBLIC include)
set(JEMALLOC_INCLUDE) set (JEMALLOC_INCLUDE_PREFIX)
if (ARCH_AMD64) # OS_
set(JEMALLOC_INCLUDE_PREFIX include_linux_x86_64) if (OS_LINUX)
elseif (ARCH_ARM) set (JEMALLOC_INCLUDE_PREFIX "include_linux")
set(JEMALLOC_INCLUDE_PREFIX include_linux_aarch64) elseif (OS_FREEBSD)
set (JEMALLOC_INCLUDE_PREFIX "include_freebsd")
elseif (OS_DARWIN)
set (JEMALLOC_INCLUDE_PREFIX "include_darwin")
else ()
message (FATAL_ERROR "This OS is not supported")
endif () endif ()
target_include_directories(jemalloc SYSTEM PUBLIC # ARCH_
${JEMALLOC_INCLUDE_PREFIX}) if (ARCH_AMD64)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_x86_64")
elseif (ARCH_ARM)
set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_aarch64")
else ()
message (FATAL_ERROR "This arch is not supported")
endif ()
configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h) ${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h)
target_include_directories(jemalloc SYSTEM PRIVATE target_include_directories(jemalloc SYSTEM PRIVATE
@ -128,6 +148,10 @@ if (ENABLE_JEMALLOC)
endif () endif ()
set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1) set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1)
if (MAKE_STATIC_LIBRARIES)
# To detect whether we need to register jemalloc for osx as default zone.
set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS BUNDLED_STATIC_JEMALLOC=1)
endif()
message (STATUS "Using jemalloc") message (STATUS "Using jemalloc")
else () else ()

View File

@ -1,3 +1,13 @@
// OSX does not have this for system alloc functions, so you will get
// "exception specification in declaration" error.
#if defined(__APPLE__) || defined(__FreeBSD__)
# undef JEMALLOC_NOTHROW
# define JEMALLOC_NOTHROW
# undef JEMALLOC_CXX_THROW
# define JEMALLOC_CXX_THROW
#endif
/* /*
* The je_ prefix on the following public symbol declarations is an artifact * The je_ prefix on the following public symbol declarations is an artifact
* of namespace management, and should be omitted in application code unless * of namespace management, and should be omitted in application code unless

View File

@ -0,0 +1,372 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
#define JEMALLOC_PREFIX "je_"
#define JEMALLOC_CPREFIX "JE_"
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 0
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
#define JEMALLOC_OS_UNFAIR_LOCK
/* Defined if syscall(2) is usable. */
/* #undef JEMALLOC_USE_SYSCALL */
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */
/*
* Defined if mach_absolute_time() is available.
*/
#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
/* #undef JEMALLOC_MUTEX_INIT_CB */
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
/* #undef JEMALLOC_DSS */
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
/* #undef JEMALLOC_LAZY_LOCK */
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 16
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 29
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
/* #undef JEMALLOC_TLS */
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#define JEMALLOC_ZONE
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
/* #undef JEMALLOC_BACKGROUND_THREAD */
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
/* #undef JEMALLOC_IS_MALLOC */
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -0,0 +1,372 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
#define JEMALLOC_PREFIX "je_"
#define JEMALLOC_CPREFIX "JE_"
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT __asm__ volatile("pause")
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 1
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
#define JEMALLOC_OS_UNFAIR_LOCK
/* Defined if syscall(2) is usable. */
/* #undef JEMALLOC_USE_SYSCALL */
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
/* #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC */
/*
* Defined if mach_absolute_time() is available.
*/
#define JEMALLOC_HAVE_MACH_ABSOLUTE_TIME 1
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
/* #undef JEMALLOC_MUTEX_INIT_CB */
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
/* #undef JEMALLOC_DSS */
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
/* #undef JEMALLOC_LAZY_LOCK */
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 12
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 21
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
/* #undef JEMALLOC_TLS */
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
#define JEMALLOC_ZONE
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
/* #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP */
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
/* #undef JEMALLOC_BACKGROUND_THREAD */
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
/* #undef JEMALLOC_IS_MALLOC */
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -0,0 +1,373 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
/* #undef JEMALLOC_PREFIX */
/* #undef JEMALLOC_CPREFIX */
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 0
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
/* #undef JEMALLOC_OS_UNFAIR_LOCK */
/* Defined if syscall(2) is usable. */
#define JEMALLOC_USE_SYSCALL
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
// Only since 12.1-STABLE
// #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
/*
* Defined if mach_absolute_time() is available.
*/
/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
#define JEMALLOC_MALLOC_THREAD_CLEANUP
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
#define JEMALLOC_MUTEX_INIT_CB 1
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
#define JEMALLOC_DSS
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
#define JEMALLOC_LAZY_LOCK
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 16
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 29
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
#define JEMALLOC_TLS
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
/* #undef JEMALLOC_ZONE */
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
#define JEMALLOC_SYSCTL_VM_OVERCOMMIT
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
#define JEMALLOC_BACKGROUND_THREAD 1
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#define JEMALLOC_IS_MALLOC 1
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -0,0 +1,373 @@
/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */
#ifndef JEMALLOC_INTERNAL_DEFS_H_
#define JEMALLOC_INTERNAL_DEFS_H_
/*
* If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
* public APIs to be prefixed. This makes it possible, with some care, to use
* multiple allocators simultaneously.
*/
/* #undef JEMALLOC_PREFIX */
/* #undef JEMALLOC_CPREFIX */
/*
* Define overrides for non-standard allocator-related functions if they are
* present on the system.
*/
/* #undef JEMALLOC_OVERRIDE___LIBC_CALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_FREE */
/* #undef JEMALLOC_OVERRIDE___LIBC_MALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN */
/* #undef JEMALLOC_OVERRIDE___LIBC_REALLOC */
/* #undef JEMALLOC_OVERRIDE___LIBC_VALLOC */
#define JEMALLOC_OVERRIDE___POSIX_MEMALIGN
/*
* JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
* For shared libraries, symbol visibility mechanisms prevent these symbols
* from being exported, but for static libraries, naming collisions are a real
* possibility.
*/
#define JEMALLOC_PRIVATE_NAMESPACE je_
/*
* Hyper-threaded CPUs may need a special instruction inside spin loops in
* order to yield to another virtual CPU.
*/
#define CPU_SPINWAIT __asm__ volatile("pause")
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 1
/*
* Number of significant bits in virtual addresses. This may be less than the
* total number of bits in a pointer, e.g. on x64, for which the uppermost 16
* bits are the same as bit 47.
*/
#define LG_VADDR 48
/* Defined if C11 atomics are available. */
#define JEMALLOC_C11_ATOMICS 1
/* Defined if GCC __atomic atomics are available. */
#define JEMALLOC_GCC_ATOMIC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS 1
/* Defined if GCC __sync atomics are available. */
#define JEMALLOC_GCC_SYNC_ATOMICS 1
/* and the 8-bit variant support. */
#define JEMALLOC_GCC_U8_SYNC_ATOMICS 1
/*
* Defined if __builtin_clz() and __builtin_clzl() are available.
*/
#define JEMALLOC_HAVE_BUILTIN_CLZ
/*
* Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
*/
/* #undef JEMALLOC_OS_UNFAIR_LOCK */
/* Defined if syscall(2) is usable. */
#define JEMALLOC_USE_SYSCALL
/*
* Defined if secure_getenv(3) is available.
*/
/* #undef JEMALLOC_HAVE_SECURE_GETENV */
/*
* Defined if issetugid(2) is available.
*/
#define JEMALLOC_HAVE_ISSETUGID
/* Defined if pthread_atfork(3) is available. */
#define JEMALLOC_HAVE_PTHREAD_ATFORK
/* Defined if pthread_setname_np(3) is available. */
// Only since 12.1-STABLE
// #define JEMALLOC_HAVE_PTHREAD_SETNAME_NP
/*
* Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
*/
/* #undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE */
/*
* Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_MONOTONIC 1
/*
* Defined if mach_absolute_time() is available.
*/
/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */
/*
* Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
*/
#define JEMALLOC_HAVE_CLOCK_REALTIME 1
/*
* Defined if _malloc_thread_cleanup() exists. At least in the case of
* FreeBSD, pthread_key_create() allocates, which if used during malloc
* bootstrapping will cause recursion into the pthreads library. Therefore, if
* _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in
* malloc_tsd.
*/
#define JEMALLOC_MALLOC_THREAD_CLEANUP
/*
* Defined if threaded initialization is known to be safe on this platform.
* Among other things, it must be possible to initialize a mutex without
* triggering allocation in order for threaded allocation to be safe.
*/
/* #undef JEMALLOC_THREADED_INIT */
/*
* Defined if the pthreads implementation defines
* _pthread_mutex_init_calloc_cb(), in which case the function is used in order
* to avoid recursive allocation during mutex initialization.
*/
#define JEMALLOC_MUTEX_INIT_CB 1
/* Non-empty if the tls_model attribute is supported. */
#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))
/*
* JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
* inline functions.
*/
/* #undef JEMALLOC_DEBUG */
/* JEMALLOC_STATS enables statistics calculation. */
#define JEMALLOC_STATS
/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */
/* JEMALLOC_PROF enables allocation profiling. */
/* #undef JEMALLOC_PROF */
/* Use libunwind for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBUNWIND */
/* Use libgcc for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_LIBGCC */
/* Use gcc intrinsics for profile backtracing if defined. */
/* #undef JEMALLOC_PROF_GCC */
/*
* JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
* segment (DSS).
*/
#define JEMALLOC_DSS
/* Support memory filling (junk/zero). */
#define JEMALLOC_FILL
/* Support utrace(2)-based tracing. */
/* #undef JEMALLOC_UTRACE */
/* Support optional abort() on OOM. */
/* #undef JEMALLOC_XMALLOC */
/* Support lazy locking (avoid locking unless a second thread is launched). */
#define JEMALLOC_LAZY_LOCK
/*
* Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
* classes).
*/
/* #undef LG_QUANTUM */
/* One page is 2^LG_PAGE bytes. */
#define LG_PAGE 12
/*
* One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the
* system does not explicitly support huge pages; system calls that require
* explicit huge page support are separately configured.
*/
#define LG_HUGEPAGE 21
/*
* If defined, adjacent virtual memory mappings with identical attributes
* automatically coalesce, and they fragment when changes are made to subranges.
* This is the normal order of things for mmap()/munmap(), but on Windows
* VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e.
* mappings do *not* coalesce/fragment.
*/
#define JEMALLOC_MAPS_COALESCE
/*
* If defined, retain memory for later reuse by default rather than using e.g.
* munmap() to unmap freed extents. This is enabled on 64-bit Linux because
* common sequences of mmap()/munmap() calls will cause virtual memory map
* holes.
*/
/* #undef JEMALLOC_RETAIN */
/* TLS is used to map arenas and magazine caches to threads. */
#define JEMALLOC_TLS
/*
* Used to mark unreachable code to quiet "end of non-void" compiler warnings.
* Don't use this directly; instead use unreachable() from util.h
*/
#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable
/*
* ffs*() functions to use for bitmapping. Don't use these directly; instead,
* use ffs_*() from util.h.
*/
#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll
#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl
#define JEMALLOC_INTERNAL_FFS __builtin_ffs
/*
* popcount*() functions to use for bitmapping.
*/
#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl
#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount
/*
* If defined, explicitly attempt to more uniformly distribute large allocation
* pointer alignments across all cache indices.
*/
#define JEMALLOC_CACHE_OBLIVIOUS
/*
* If defined, enable logging facilities. We make this a configure option to
* avoid taking extra branches everywhere.
*/
/* #undef JEMALLOC_LOG */
/*
* If defined, use readlinkat() (instead of readlink()) to follow
* /etc/malloc_conf.
*/
/* #undef JEMALLOC_READLINKAT */
/*
* Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
*/
/* #undef JEMALLOC_ZONE */
/*
* Methods for determining whether the OS overcommits.
* JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
* /proc/sys/vm.overcommit_memory file.
* JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
*/
#define JEMALLOC_SYSCTL_VM_OVERCOMMIT
/* #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY */
/* Defined if madvise(2) is available. */
#define JEMALLOC_HAVE_MADVISE
/*
* Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
* arguments to madvise(2).
*/
/* #undef JEMALLOC_HAVE_MADVISE_HUGE */
/*
* Methods for purging unused pages differ between operating systems.
*
* madvise(..., MADV_FREE) : This marks pages as being unused, such that they
* will be discarded rather than swapped out.
* madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
* defined, this immediately discards pages,
* such that new pages will be demand-zeroed if
* the address region is later touched;
* otherwise this behaves similarly to
* MADV_FREE, though typically with higher
* system overhead.
*/
#define JEMALLOC_PURGE_MADVISE_FREE
#define JEMALLOC_PURGE_MADVISE_DONTNEED
/* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */
/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
/* #undef JEMALLOC_DEFINE_MADVISE_FREE */
/*
* Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
*/
/* #undef JEMALLOC_MADVISE_DONTDUMP */
/*
* Defined if transparent huge pages (THPs) are supported via the
* MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
*/
/* #undef JEMALLOC_THP */
/* Define if operating system has alloca.h header. */
/* #undef JEMALLOC_HAS_ALLOCA_H */
/* C99 restrict keyword supported. */
#define JEMALLOC_HAS_RESTRICT 1
/* For use by hash code. */
/* #undef JEMALLOC_BIG_ENDIAN */
/* sizeof(int) == 2^LG_SIZEOF_INT. */
#define LG_SIZEOF_INT 2
/* sizeof(long) == 2^LG_SIZEOF_LONG. */
#define LG_SIZEOF_LONG 3
/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
#define LG_SIZEOF_LONG_LONG 3
/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
#define LG_SIZEOF_INTMAX_T 3
/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */
/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */
/* glibc memalign hook. */
/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */
/* pthread support */
#define JEMALLOC_HAVE_PTHREAD
/* dlsym() support */
#define JEMALLOC_HAVE_DLSYM
/* Adaptive mutex support in pthreads. */
#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
/* GNU specific sched_getcpu support */
/* #undef JEMALLOC_HAVE_SCHED_GETCPU */
/* GNU specific sched_setaffinity support */
/* #undef JEMALLOC_HAVE_SCHED_SETAFFINITY */
/*
* If defined, all the features necessary for background threads are present.
*/
#define JEMALLOC_BACKGROUND_THREAD 1
/*
* If defined, jemalloc symbols are not exported (doesn't work when
* JEMALLOC_PREFIX is not defined).
*/
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#define JEMALLOC_IS_MALLOC 1
/*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/
/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */
#endif /* JEMALLOC_INTERNAL_DEFS_H_ */

View File

@ -35,7 +35,7 @@
*/ */
#define CPU_SPINWAIT #define CPU_SPINWAIT
/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ /* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
#define HAVE_CPU_SPINWAIT 9 #define HAVE_CPU_SPINWAIT 0
/* /*
* Number of significant bits in virtual addresses. This may be less than the * Number of significant bits in virtual addresses. This may be less than the

View File

@ -1,213 +0,0 @@
#ifndef JEMALLOC_PREAMBLE_H
#define JEMALLOC_PREAMBLE_H
#include "jemalloc_internal_defs.h"
#include "jemalloc/internal/jemalloc_internal_decls.h"
#ifdef JEMALLOC_UTRACE
#include <sys/ktrace.h>
#endif
#define JEMALLOC_NO_DEMANGLE
#ifdef JEMALLOC_JET
# undef JEMALLOC_IS_MALLOC
# define JEMALLOC_N(n) jet_##n
# include "jemalloc/internal/public_namespace.h"
# define JEMALLOC_NO_RENAME
# include "jemalloc/jemalloc.h"
# undef JEMALLOC_NO_RENAME
#else
# define JEMALLOC_N(n) je_##n
# include "jemalloc/jemalloc.h"
#endif
#if defined(JEMALLOC_OSATOMIC)
#include <libkern/OSAtomic.h>
#endif
#ifdef JEMALLOC_ZONE
#include <mach/mach_error.h>
#include <mach/mach_init.h>
#include <mach/vm_map.h>
#endif
#include "jemalloc/internal/jemalloc_internal_macros.h"
/*
* Note that the ordering matters here; the hook itself is name-mangled. We
* want the inclusion of hooks to happen early, so that we hook as much as
* possible.
*/
#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE
# ifndef JEMALLOC_JET
# include "jemalloc/internal/private_namespace.h"
# else
# include "jemalloc/internal/private_namespace_jet.h"
# endif
#endif
#include "jemalloc/internal/test_hooks.h"
#ifdef JEMALLOC_DEFINE_MADVISE_FREE
# define JEMALLOC_MADV_FREE 8
#endif
static const bool config_debug =
#ifdef JEMALLOC_DEBUG
true
#else
false
#endif
;
static const bool have_dss =
#ifdef JEMALLOC_DSS
true
#else
false
#endif
;
static const bool have_madvise_huge =
#ifdef JEMALLOC_HAVE_MADVISE_HUGE
true
#else
false
#endif
;
static const bool config_fill =
#ifdef JEMALLOC_FILL
true
#else
false
#endif
;
static const bool config_lazy_lock =
#ifdef JEMALLOC_LAZY_LOCK
true
#else
false
#endif
;
static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
static const bool config_prof =
#ifdef JEMALLOC_PROF
true
#else
false
#endif
;
static const bool config_prof_libgcc =
#ifdef JEMALLOC_PROF_LIBGCC
true
#else
false
#endif
;
static const bool config_prof_libunwind =
#ifdef JEMALLOC_PROF_LIBUNWIND
true
#else
false
#endif
;
static const bool maps_coalesce =
#ifdef JEMALLOC_MAPS_COALESCE
true
#else
false
#endif
;
static const bool config_stats =
#ifdef JEMALLOC_STATS
true
#else
false
#endif
;
static const bool config_tls =
#ifdef JEMALLOC_TLS
true
#else
false
#endif
;
static const bool config_utrace =
#ifdef JEMALLOC_UTRACE
true
#else
false
#endif
;
static const bool config_xmalloc =
#ifdef JEMALLOC_XMALLOC
true
#else
false
#endif
;
static const bool config_cache_oblivious =
#ifdef JEMALLOC_CACHE_OBLIVIOUS
true
#else
false
#endif
;
/*
* Undocumented, for jemalloc development use only at the moment. See the note
* in jemalloc/internal/log.h.
*/
static const bool config_log =
#ifdef JEMALLOC_LOG
true
#else
false
#endif
;
/*
* Are extra safety checks enabled; things like checking the size of sized
* deallocations, double-frees, etc.
*/
static const bool config_opt_safety_checks =
#ifdef JEMALLOC_OPT_SAFETY_CHECKS
true
#elif defined(JEMALLOC_DEBUG)
/*
* This lets us only guard safety checks by one flag instead of two; fast
* checks can guard solely by config_opt_safety_checks and run in debug mode
* too.
*/
true
#else
false
#endif
;
#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
/* Currently percpu_arena depends on sched_getcpu. */
#define JEMALLOC_PERCPU_ARENA
#endif
static const bool have_percpu_arena =
#ifdef JEMALLOC_PERCPU_ARENA
true
#else
false
#endif
;
/*
* Undocumented, and not recommended; the application should take full
* responsibility for tracking provenance.
*/
static const bool force_ivsalloc =
#ifdef JEMALLOC_FORCE_IVSALLOC
true
#else
false
#endif
;
static const bool have_background_thread =
#ifdef JEMALLOC_BACKGROUND_THREAD
true
#else
false
#endif
;
#endif /* JEMALLOC_PREAMBLE_H */

1
contrib/sentry-native vendored Submodule

@ -0,0 +1 @@
Subproject commit f91ed3f95b5653f247189d720ab00765b4899d6f

View File

@ -6,7 +6,6 @@
"docker/test/compatibility/ubuntu": "yandex/clickhouse-test-old-ubuntu", "docker/test/compatibility/ubuntu": "yandex/clickhouse-test-old-ubuntu",
"docker/test/integration/base": "yandex/clickhouse-integration-test", "docker/test/integration/base": "yandex/clickhouse-integration-test",
"docker/test/performance-comparison": "yandex/clickhouse-performance-comparison", "docker/test/performance-comparison": "yandex/clickhouse-performance-comparison",
"docker/test/pvs": "yandex/clickhouse-pvs-test",
"docker/test/stateful": "yandex/clickhouse-stateful-test", "docker/test/stateful": "yandex/clickhouse-stateful-test",
"docker/test/stateful_with_coverage": "yandex/clickhouse-stateful-test-with-coverage", "docker/test/stateful_with_coverage": "yandex/clickhouse-stateful-test-with-coverage",
"docker/test/stateless": "yandex/clickhouse-stateless-test", "docker/test/stateless": "yandex/clickhouse-stateless-test",

View File

@ -59,14 +59,21 @@ ENV CC=clang-10
ENV CXX=clang++-10 ENV CXX=clang++-10
# libtapi is required to support .tbh format from recent MacOS SDKs # libtapi is required to support .tbh format from recent MacOS SDKs
RUN git clone https://github.com/tpoechtrager/apple-libtapi.git RUN git clone https://github.com/tpoechtrager/apple-libtapi.git \
RUN cd apple-libtapi && INSTALLPREFIX=/cctools ./build.sh && ./install.sh && cd apple-libtapi \
RUN rm -rf apple-libtapi && INSTALLPREFIX=/cctools ./build.sh \
&& ./install.sh \
&& cd .. \
&& rm -rf apple-libtapi
# Build and install tools for cross-linking to Darwin # Build and install tools for cross-linking to Darwin
RUN git clone https://github.com/tpoechtrager/cctools-port.git RUN git clone https://github.com/tpoechtrager/cctools-port.git \
RUN cd cctools-port/cctools && ./configure --prefix=/cctools --with-libtapi=/cctools --target=x86_64-apple-darwin && make install && cd cctools-port/cctools \
RUN rm -rf cctools-port && ./configure --prefix=/cctools --with-libtapi=/cctools \
--target=x86_64-apple-darwin \
&& make install \
&& cd ../.. \
&& rm -rf cctools-port
# Download toolchain for Darwin # Download toolchain for Darwin
RUN wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz RUN wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz

View File

@ -17,8 +17,8 @@ ccache --show-stats ||:
ccache --zero-stats ||: ccache --zero-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
rm -f CMakeCache.txt rm -f CMakeCache.txt
cmake .. -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS ..
ninja clickhouse-bundle ninja -v clickhouse-bundle
mv ./programs/clickhouse* /output mv ./programs/clickhouse* /output
mv ./src/unit_tests_dbms /output mv ./src/unit_tests_dbms /output
find . -name '*.so' -print -exec mv '{}' /output \; find . -name '*.so' -print -exec mv '{}' /output \;

View File

@ -27,6 +27,7 @@ RUN apt-get update \
luajit \ luajit \
libssl-dev \ libssl-dev \
gdb \ gdb \
virtualenv \
&& rm -rf \ && rm -rf \
/var/lib/apt/lists/* \ /var/lib/apt/lists/* \
/var/cache/debconf \ /var/cache/debconf \
@ -35,8 +36,9 @@ RUN apt-get update \
ENV TZ=Europe/Moscow ENV TZ=Europe/Moscow
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
RUN mkdir /venv && virtualenv /venv
RUN pip install urllib3==1.23 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio rpm-confluent-schemaregistry grpcio grpcio-tools cassandra-driver RUN /bin/bash -c "source /venv/bin/activate && pip install requests urllib3 pytest docker-compose==1.22.0 docker dicttoxml kazoo PyMySQL psycopg2-binary==2.7.5 pymongo tzlocal kafka-python protobuf redis aerospike pytest-timeout minio rpm-confluent-schemaregistry grpcio grpcio-tools cassandra-driver"
ENV DOCKER_CHANNEL stable ENV DOCKER_CHANNEL stable
ENV DOCKER_VERSION 17.09.1-ce ENV DOCKER_VERSION 17.09.1-ce
@ -73,5 +75,4 @@ RUN set -x \
VOLUME /var/lib/docker VOLUME /var/lib/docker
EXPOSE 2375 EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"] ENTRYPOINT ["dockerd-entrypoint.sh"]
CMD ["sh", "-c", "pytest $PYTEST_OPTS"] CMD ["bash", "-c", "source /venv/bin/activate && pytest $PYTEST_OPTS"]

View File

@ -12,6 +12,8 @@ RUN apt-get update \
g++ \ g++ \
gdb \ gdb \
git \ git \
gnuplot \
imagemagick \
libc6-dbg \ libc6-dbg \
moreutils \ moreutils \
ncdu \ ncdu \

View File

@ -36,18 +36,14 @@ function configure
while killall clickhouse-server; do echo . ; sleep 1 ; done while killall clickhouse-server; do echo . ; sleep 1 ; done
echo all killed echo all killed
# Remove logs etc, because they will be updated, and sharing them between
# servers with hardlink might cause unpredictable behavior.
rm db0/data/system/* -rf ||:
rm db0/metadata/system/* -rf ||:
# Make copies of the original db for both servers. Use hardlinks instead # Make copies of the original db for both servers. Use hardlinks instead
# of copying. Be careful to remove preprocessed configs and system tables,or # of copying to save space. Before that, remove preprocessed configs and
# it can lead to weird effects. # system tables, because sharing them between servers with hardlinks may
# lead to weird effects.
rm -r left/db ||: rm -r left/db ||:
rm -r right/db ||: rm -r right/db ||:
rm -r db0/preprocessed_configs ||: rm -r db0/preprocessed_configs ||:
rm -r db/{data,metadata}/system ||: rm -r db0/{data,metadata}/system ||:
cp -al db0/ left/db/ cp -al db0/ left/db/
cp -al db0/ right/db/ cp -al db0/ right/db/
} }
@ -131,6 +127,11 @@ function run_tests
test_files=$(ls "$test_prefix"/*.xml) test_files=$(ls "$test_prefix"/*.xml)
fi fi
# Determine which concurrent benchmarks to run. For now, the only test
# we run as a concurrent benchmark is 'website'. Run it as benchmark if we
# are also going to run it as a normal test.
for test in $test_files; do echo $test; done | sed -n '/website/p' > benchmarks-to-run.txt
# Delete old report files. # Delete old report files.
for x in {test-times,wall-clock-times}.tsv for x in {test-times,wall-clock-times}.tsv
do do
@ -138,15 +139,18 @@ function run_tests
touch "$x" touch "$x"
done done
# Randomize test order.
test_files=$(for f in $test_files; do echo "$f"; done | sort -R)
# Run the tests. # Run the tests.
test_name="<none>" test_name="<none>"
for test in $test_files for test in $test_files
do do
# Check that both servers are alive, to fail faster if they die. # Check that both servers are alive, and restart them if they die.
clickhouse-client --port 9001 --query "select 1 format Null" \ clickhouse-client --port 9001 --query "select 1 format Null" \
|| { echo $test_name >> left-server-died.log ; restart ; continue ; } || { echo $test_name >> left-server-died.log ; restart ; }
clickhouse-client --port 9002 --query "select 1 format Null" \ clickhouse-client --port 9002 --query "select 1 format Null" \
|| { echo $test_name >> right-server-died.log ; restart ; continue ; } || { echo $test_name >> right-server-died.log ; restart ; }
test_name=$(basename "$test" ".xml") test_name=$(basename "$test" ".xml")
echo test "$test_name" echo test "$test_name"
@ -161,6 +165,30 @@ function run_tests
wait wait
} }
# Run some queries concurrently and report the resulting TPS. This additional
# (relatively) short test helps detect concurrency-related effects, because the
# main performance comparison testing is done query-by-query.
function run_benchmark
{
rm -rf benchmark ||:
mkdir benchmark ||:
# The list is built by run_tests.
for file in $(cat benchmarks-to-run.txt)
do
name=$(basename "$file" ".xml")
"$script_dir/perf.py" --print-queries "$file" > "benchmark/$name-queries.txt"
"$script_dir/perf.py" --print-settings "$file" > "benchmark/$name-settings.txt"
readarray -t settings < "benchmark/$name-settings.txt"
command=(clickhouse-benchmark --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --continue_on_errors "${settings[@]}")
"${command[@]}" --port 9001 --json "benchmark/$name-left.json" < "benchmark/$name-queries.txt"
"${command[@]}" --port 9002 --json "benchmark/$name-right.json" < "benchmark/$name-queries.txt"
done
}
function get_profiles_watchdog function get_profiles_watchdog
{ {
sleep 6000 sleep 6000
@ -188,10 +216,13 @@ function get_profiles
# Collect the profiles # Collect the profiles
clickhouse-client --port 9001 --query "set query_profiler_cpu_time_period_ns = 0" clickhouse-client --port 9001 --query "set query_profiler_cpu_time_period_ns = 0"
clickhouse-client --port 9001 --query "set query_profiler_real_time_period_ns = 0" clickhouse-client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
clickhouse-client --port 9001 --query "set query_profiler_cpu_time_period_ns = 0" clickhouse-client --port 9001 --query "system flush logs" &
clickhouse-client --port 9001 --query "set query_profiler_real_time_period_ns = 0"
clickhouse-client --port 9001 --query "system flush logs" clickhouse-client --port 9002 --query "set query_profiler_cpu_time_period_ns = 0"
clickhouse-client --port 9002 --query "system flush logs" clickhouse-client --port 9002 --query "set query_profiler_real_time_period_ns = 0"
clickhouse-client --port 9002 --query "system flush logs" &
wait
clickhouse-client --port 9001 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: & clickhouse-client --port 9001 --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
clickhouse-client --port 9001 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port 9001 --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
@ -219,7 +250,7 @@ function build_log_column_definitions
{ {
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
# absolutely atrocious way. This should be done by the file() function itself. # absolutely atrocious way. This should be done by the file() function itself.
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv for x in {right,left}-{addresses,{query,query-thread,trace,{async-,}metric}-log}.tsv
do do
paste -d' ' \ paste -d' ' \
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \ <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
@ -264,25 +295,54 @@ create view right_query_log as select *
from file('right-query-log.tsv', TSVWithNamesAndTypes, from file('right-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "right-query-log.tsv.columns")'); '$(cat "right-query-log.tsv.columns")');
create table query_metrics engine File(TSV, -- do not add header -- will parse with grep create view query_logs as
'analyze/query-run-metrics.tsv') select *, 0 version from left_query_log
as select
test, query_index, 0 run, version,
[
-- server-reported time
query_duration_ms / toFloat64(1000)
, toFloat64(memory_usage)
-- client-reported time
, query_runs.time
] metrics
from (
select query_duration_ms, memory_usage, query_id, 0 version from left_query_log
union all union all
select query_duration_ms, memory_usage, query_id, 1 version from right_query_log select *, 1 version from right_query_log
) query_logs ;
create table query_run_metrics_full engine File(TSV, 'analyze/query-run-metrics-full.tsv')
as
with (
-- sumMapState with the list of all keys with '-0.' values. Negative zero is because
-- sumMap removes keys with positive zeros.
with (select groupUniqArrayArray(ProfileEvents.Names) from query_logs) as all_names
select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))])
) as all_metrics
select test, query_index, version, query_id,
(finalizeAggregation(
arrayReduce('sumMapMergeState',
[
all_metrics,
arrayReduce('sumMapState',
[(ProfileEvents.Names,
arrayMap(x->toFloat64(x), ProfileEvents.Values))]
),
arrayReduce('sumMapState', [(
['client_time', 'server_time'],
arrayMap(x->if(x != 0., x, -0.), [
toFloat64(query_runs.time),
toFloat64(query_duration_ms / 1000.)]))])
]
)) as metrics_tuple).1 metric_names,
metrics_tuple.2 metric_values
from query_logs
right join query_runs right join query_runs
using (query_id, version) on query_logs.query_id = query_runs.query_id
order by test, query_index and query_logs.version = query_runs.version
;
create table query_run_metrics engine File(
TSV, -- do not add header -- will parse with grep
'analyze/query-run-metrics.tsv')
as select test, query_index, 0 run, version, metric_values
from query_run_metrics_full
where test = 'arithmetic'
order by test, query_index, run, version
;
create table query_run_metric_names engine File(TSV, 'analyze/query-run-metric-names.tsv')
as select metric_names from query_run_metrics_full limit 1
; ;
" "
@ -292,7 +352,6 @@ create table query_metrics engine File(TSV, -- do not add header -- will parse w
# query. We also don't have lateral joins. So I just put all runs of each # query. We also don't have lateral joins. So I just put all runs of each
# query into a separate file, and then compute randomization distribution # query into a separate file, and then compute randomization distribution
# for each file. I do this in parallel using GNU parallel. # for each file. I do this in parallel using GNU parallel.
query_index=1
IFS=$'\n' IFS=$'\n'
for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq) for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
do do
@ -332,20 +391,34 @@ create view query_display_names as select * from
'test text, query_index int, query_display_name text') 'test text, query_index int, query_display_name text')
; ;
create table query_metric_stats engine File(TSVWithNamesAndTypes, -- WITH, ARRAY JOIN and CROSS JOIN do not like each other:
'report/query-metric-stats.tsv') as -- https://github.com/ClickHouse/ClickHouse/issues/11868
-- https://github.com/ClickHouse/ClickHouse/issues/11757
-- Because of this, we make a view with arrays first, and then apply all the
-- array joins.
create view query_metric_stat_arrays as
with (select * from file('analyze/query-run-metric-names.tsv',
TSV, 'n Array(String)')) as metric_name
select metric_name, left, right, diff, stat_threshold, test, query_index, select metric_name, left, right, diff, stat_threshold, test, query_index,
query_display_name query_display_name
from file ('analyze/query-reports.tsv', TSV, 'left Array(float), from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float), right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports test text, query_index int') reports
left array join ['server_time', 'memory', 'client_time'] as metric_name,
left, right, diff, stat_threshold
left join query_display_names left join query_display_names
on reports.test = query_display_names.test on reports.test = query_display_names.test
and reports.query_index = query_display_names.query_index and reports.query_index = query_display_names.query_index
; ;
create table query_metric_stats engine File(TSVWithNamesAndTypes,
'report/query-metric-stats.tsv')
as
select metric_name, left, right, diff, stat_threshold, test, query_index,
query_display_name
from query_metric_stat_arrays
left array join metric_name, left, right, diff, stat_threshold
;
-- Main statistics for queries -- query time as reported in query log. -- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select as select
@ -364,7 +437,8 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
not short and not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show, not short and not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,
left, right, diff, stat_threshold, left, right, diff, stat_threshold,
if(report_threshold > 0, report_threshold, 0.10) as report_threshold, --if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
0.10 as report_threshold,
test, query_index, query_display_name test, query_index, query_display_name
from query_metric_stats from query_metric_stats
left join file('analyze/report-thresholds.tsv', TSV, left join file('analyze/report-thresholds.tsv', TSV,
@ -712,6 +786,64 @@ unset IFS
grep -H -m2 -i '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||: grep -H -m2 -i '\(Exception\|Error\):[^:]' ./*-err.log | sed 's/:/\t/' >> run-errors.tsv ||:
} }
function report_metrics
{
rm -rf metrics ||:
mkdir metrics
clickhouse-local --stacktrace --verbose --query "
create view right_async_metric_log as
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes,
'event_date Date, event_time DateTime, name String, value Float64')
;
-- Use the right log as time reference because it may have higher precision.
create table metrics engine File(TSV, 'metrics/metrics.tsv') as
with (select min(event_time) from right_async_metric_log) as min_time
select name metric, r.event_time - min_time event_time, l.value as left, r.value as right
from right_async_metric_log r
asof join file('left-async-metric-log.tsv', TSVWithNamesAndTypes,
'event_date Date, event_time DateTime, name String, value Float64') l
on l.name = r.name and r.event_time <= l.event_time
order by metric, event_time
;
-- Show metrics that have changed
create table changes engine File(TSV, 'metrics/changes.tsv') as
select metric, median(left) as left, median(right) as right,
floor((right - left) / left, 3) diff,
floor(if(left > right, left / right, right / left), 3) times_diff
from metrics
group by metric
having abs(diff) > 0.05 and isFinite(diff)
order by diff desc
;
"
IFS=$'\n'
for prefix in $(cut -f1 "metrics/metrics.tsv" | sort | uniq)
do
file="metrics/$prefix.tsv"
grep "^$prefix " "metrics/metrics.tsv" | cut -f2- > "$file"
gnuplot -e "
set datafile separator '\t';
set terminal png size 960,540;
set xtics time format '%tH:%tM';
set title '$prefix' noenhanced offset 0,-3;
set key left top;
plot
'$file' using 1:2 with lines title 'Left'
, '$file' using 1:3 with lines title 'Right'
;
" \
| convert - -filter point -resize "200%" "metrics/$prefix.png" &
done
wait
unset IFS
}
# Check that local and client are in PATH # Check that local and client are in PATH
clickhouse-local --version > /dev/null clickhouse-local --version > /dev/null
clickhouse-client --version > /dev/null clickhouse-client --version > /dev/null
@ -729,13 +861,28 @@ case "$stage" in
# Ignore the errors to collect the log and build at least some report, anyway # Ignore the errors to collect the log and build at least some report, anyway
time run_tests ||: time run_tests ||:
;& ;&
"run_benchmark")
time run_benchmark 2> >(tee -a run-errors.tsv 1>&2) ||:
;&
"get_profiles") "get_profiles")
# Getting profiles inexplicably hangs sometimes, so try to save some logs if # Check for huge pages.
# this happens again. Give the servers some time to collect all info, then cat /sys/kernel/mm/transparent_hugepage/enabled > thp-enabled.txt ||:
# trace and kill. Start in a subshell, so that both function don't interfere cat /proc/meminfo > meminfo.txt ||:
# with each other's jobs through `wait`. Also make the subshell have its own for pid in $(pgrep -f clickhouse-server)
# process group, so that we can then kill it with all its child processes. do
# Somehow it doesn't kill the children by itself when dying. cat "/proc/$pid/smaps" > "$pid-smaps.txt" ||:
done
# Sleep for five minutes to see how the servers enter a quiescent state (e.g.
# how fast the memory usage drops).
sleep 300
# We had a bug where getting profiles froze sometimes, so try to save some
# logs if this happens again. Give the servers some time to collect all info,
# then trace and kill. Start in a subshell, so that both function don't
# interfere with each other's jobs through `wait`. Also make the subshell
# have its own process group, so that we can then kill it with all its child
# processes. Somehow it doesn't kill the children by itself when dying.
set -m set -m
( get_profiles_watchdog ) & ( get_profiles_watchdog ) &
watchdog_pid=$! watchdog_pid=$!
@ -762,7 +909,11 @@ case "$stage" in
;& ;&
"report") "report")
time report ||: time report ||:
;&
"report_metrics")
time report_metrics ||:
;&
"report_html")
time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(tee -a report/errors.log 1>&2) ||: time "$script_dir/report.py" --report=all-queries > all-queries.html 2> >(tee -a report/errors.log 1>&2) ||:
time "$script_dir/report.py" > report.html time "$script_dir/report.py" > report.html
;& ;&

View File

@ -27,11 +27,11 @@ function download
# might have the same version on left and right # might have the same version on left and right
if ! [ "$left_sha" = "$right_sha" ] if ! [ "$left_sha" = "$right_sha" ]
then then
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv & wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv &
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv & wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C right --strip-components=1 -zxv &
else else
mkdir right ||: mkdir right ||:
wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right & wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/clickhouse_build_check/performance/performance.tgz" -O- | tar -C left --strip-components=1 -zxv && cp -a left/* right &
fi fi
for dataset_name in $datasets for dataset_name in $datasets

View File

@ -50,7 +50,7 @@ function find_reference_sha
# FIXME sometimes we have testing tags on commits without published builds -- # FIXME sometimes we have testing tags on commits without published builds --
# normally these are documentation commits. Loop to skip them. # normally these are documentation commits. Loop to skip them.
if curl --fail --head "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/performance/performance.tgz" if curl --fail --head "https://clickhouse-builds.s3.yandex.net/0/$REF_SHA/clickhouse_build_check/performance/performance.tgz"
then then
break break
fi fi
@ -133,6 +133,6 @@ dmesg -T > dmesg.log
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \ 7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \ {right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
report analyze benchmark report analyze benchmark metrics
cp compare.log /output cp compare.log /output

View File

@ -14,22 +14,15 @@ import traceback
def tsv_escape(s): def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
stage_start_seconds = time.perf_counter()
def report_stage_end(stage_name):
global stage_start_seconds
print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds))
stage_start_seconds = time.perf_counter()
report_stage_end('start')
parser = argparse.ArgumentParser(description='Run performance test.') parser = argparse.ArgumentParser(description='Run performance test.')
# Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set.
parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file')
parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.") parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.")
parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.") parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.")
parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.')
parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.') parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.')
parser.add_argument('--print-queries', action='store_true', help='Print test queries and exit.')
parser.add_argument('--print-settings', action='store_true', help='Print test settings and exit.')
args = parser.parse_args() args = parser.parse_args()
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
@ -37,35 +30,6 @@ test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0]) tree = et.parse(args.file[0])
root = tree.getroot() root = tree.getroot()
# Skip long tests
for tag in root.findall('.//tag'):
if tag.text == 'long':
print('skipped\tTest is tagged as long.')
sys.exit(0)
# Check main metric
main_metric_element = root.find('main_metric/*')
if main_metric_element is not None and main_metric_element.tag != 'min_time':
raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag))
# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't.
infinite_sign = root.find('.//average_speed_not_changing_for_ms')
if infinite_sign is not None:
raise Exception('Looks like the test is infinite (sign 1)')
# Print report threshold for the test if it is set.
if 'max_ignored_relative_change' in root.attrib:
print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')
# Open connections
servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
connections = [clickhouse_driver.Client(**server) for server in servers]
for s in servers:
print('server\t{}\t{}'.format(s['host'], s['port']))
report_stage_end('connect')
# Process query parameters # Process query parameters
subst_elems = root.findall('substitutions/substitution') subst_elems = root.findall('substitutions/substitution')
available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... }
@ -93,68 +57,6 @@ def substitute_parameters(query_templates, other_templates = []):
else: else:
return query_results return query_results
report_stage_end('substitute')
# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
# connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for c in connections:
for q in drop_queries:
try:
c.execute(q)
except:
pass
report_stage_end('drop1')
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
# settings and one of them throws an exception, all previous settings for this
# connection will be reset, because the driver reconnects on error (not
# configurable). So the end result is uncertain, but hopefully we'll be able to
# run at least some queries.
settings = root.findall('settings/*')
for c in connections:
for s in settings:
try:
c.execute("set {} = '{}'".format(s.tag, s.text))
except:
print(traceback.format_exc(), file=sys.stderr)
report_stage_end('settings')
# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
for t in tables:
for c in connections:
try:
res = c.execute("select 1 from {} limit 1".format(t))
except:
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2])
print(f'skipped\t{tsv_escape(skipped_message)}')
sys.exit(0)
report_stage_end('preconditions')
# Run create queries
create_query_templates = [q.text for q in root.findall('create_query')]
create_queries = substitute_parameters(create_query_templates)
for c in connections:
for q in create_queries:
c.execute(q)
# Run fill queries
fill_query_templates = [q.text for q in root.findall('fill_query')]
fill_queries = substitute_parameters(fill_query_templates)
for c in connections:
for q in fill_queries:
c.execute(q)
report_stage_end('fill')
# Build a list of test queries, substituting parameters to query templates, # Build a list of test queries, substituting parameters to query templates,
# and reporting the queries marked as short. # and reporting the queries marked as short.
@ -171,7 +73,104 @@ for e in root.findall('query'):
test_queries += new_queries test_queries += new_queries
report_stage_end('substitute2')
# If we're only asked to print the queries, do that and exit
if args.print_queries:
for q in test_queries:
print(q)
exit(0)
# If we're only asked to print the settings, do that and exit. These are settings
# for clickhouse-benchmark, so we print them as command line arguments, e.g.
# '--max_memory_usage=10000000'.
if args.print_settings:
for s in root.findall('settings/*'):
print(f'--{s.tag}={s.text}')
exit(0)
# Skip long tests
if not args.long:
for tag in root.findall('.//tag'):
if tag.text == 'long':
print('skipped\tTest is tagged as long.')
sys.exit(0)
# Check main metric to detect infinite tests. We shouldn't have such tests anymore,
# but we did in the past, and it is convenient to be able to process old tests.
main_metric_element = root.find('main_metric/*')
if main_metric_element is not None and main_metric_element.tag != 'min_time':
raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag))
# Another way to detect infinite tests. They should have an appropriate main_metric
# but sometimes they don't.
infinite_sign = root.find('.//average_speed_not_changing_for_ms')
if infinite_sign is not None:
raise Exception('Looks like the test is infinite (sign 1)')
# Print report threshold for the test if it is set.
if 'max_ignored_relative_change' in root.attrib:
print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}')
# Open connections
servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)]
connections = [clickhouse_driver.Client(**server) for server in servers]
for s in servers:
print('server\t{}\t{}'.format(s['host'], s['port']))
# Run drop queries, ignoring errors. Do this before all other activity, because
# clickhouse_driver disconnects on error (this is not configurable), and the new
# connection loses the changes in settings.
drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates)
for c in connections:
for q in drop_queries:
try:
c.execute(q)
except:
pass
# Apply settings.
# If there are errors, report them and continue -- maybe a new test uses a setting
# that is not in master, but the queries can still run. If we have multiple
# settings and one of them throws an exception, all previous settings for this
# connection will be reset, because the driver reconnects on error (not
# configurable). So the end result is uncertain, but hopefully we'll be able to
# run at least some queries.
settings = root.findall('settings/*')
for c in connections:
for s in settings:
try:
c.execute("set {} = '{}'".format(s.tag, s.text))
except:
print(traceback.format_exc(), file=sys.stderr)
# Check tables that should exist. If they don't exist, just skip this test.
tables = [e.text for e in root.findall('preconditions/table_exists')]
for t in tables:
for c in connections:
try:
res = c.execute("select 1 from {} limit 1".format(t))
except:
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2])
print(f'skipped\t{tsv_escape(skipped_message)}')
sys.exit(0)
# Run create queries
create_query_templates = [q.text for q in root.findall('create_query')]
create_queries = substitute_parameters(create_query_templates)
for c in connections:
for q in create_queries:
c.execute(q)
# Run fill queries
fill_query_templates = [q.text for q in root.findall('fill_query')]
fill_queries = substitute_parameters(fill_query_templates)
for c in connections:
for q in fill_queries:
c.execute(q)
# Run test queries. # Run test queries.
for query_index, q in enumerate(test_queries): for query_index, q in enumerate(test_queries):
@ -220,13 +219,9 @@ for query_index, q in enumerate(test_queries):
client_seconds = time.perf_counter() - start_seconds client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
report_stage_end('benchmark')
# Run drop queries # Run drop queries
drop_query_templates = [q.text for q in root.findall('drop_query')] drop_query_templates = [q.text for q in root.findall('drop_query')]
drop_queries = substitute_parameters(drop_query_templates) drop_queries = substitute_parameters(drop_query_templates)
for c in connections: for c in connections:
for q in drop_queries: for q in drop_queries:
c.execute(q) c.execute(q)
report_stage_end('drop2')

View File

@ -5,7 +5,9 @@ import ast
import collections import collections
import csv import csv
import itertools import itertools
import json
import os import os
import pprint
import sys import sys
import traceback import traceback
@ -101,7 +103,7 @@ def tableRow(cell_values, cell_attributes = []):
for v, a in itertools.zip_longest( for v, a in itertools.zip_longest(
cell_values, cell_attributes, cell_values, cell_attributes,
fillvalue = '') fillvalue = '')
if a is not None])) if a is not None and v is not None]))
def tableHeader(r): def tableHeader(r):
return tr(''.join([th(f) for f in r])) return tr(''.join([th(f) for f in r]))
@ -189,10 +191,9 @@ if args.report == 'main':
slow_on_client_rows = tsvRows('report/slow-on-client.tsv') slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows) error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client', printSimpleTable('Slow on client',
['Client time, s', 'Server time, s', 'Ratio', 'Test', 'Query'], ['Client time,&nbsp;s', 'Server time,&nbsp;s', 'Ratio', 'Test', 'Query'],
slow_on_client_rows) slow_on_client_rows)
unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv') unmarked_short_rows = tsvRows('report/unmarked-short-queries.tsv')
error_tests += len(unmarked_short_rows) error_tests += len(unmarked_short_rows)
printSimpleTable('Short queries not marked as short', printSimpleTable('Short queries not marked as short',
@ -208,8 +209,8 @@ if args.report == 'main':
print(tableStart('Changes in performance')) print(tableStart('Changes in performance'))
columns = [ columns = [
'Old, s', # 0 'Old,&nbsp;s', # 0
'New, s', # 1 'New,&nbsp;s', # 1
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 2 'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 2
'p&nbsp;<&nbsp;0.001 threshold', # 3 'p&nbsp;<&nbsp;0.001 threshold', # 3
# Failed # 4 # Failed # 4
@ -250,8 +251,8 @@ if args.report == 'main':
unstable_queries += len(unstable_rows) unstable_queries += len(unstable_rows)
columns = [ columns = [
'Old, s', #0 'Old,&nbsp;s', #0
'New, s', #1 'New,&nbsp;s', #1
'Relative difference (new&nbsp;-&nbsp;old)/old', #2 'Relative difference (new&nbsp;-&nbsp;old)/old', #2
'p&nbsp;<&nbsp;0.001 threshold', #3 'p&nbsp;<&nbsp;0.001 threshold', #3
# Failed #4 # Failed #4
@ -293,13 +294,13 @@ if args.report == 'main':
columns = [ columns = [
'Test', #0 'Test', #0
'Wall clock time, s', #1 'Wall clock time,&nbsp;s', #1
'Total client time, s', #2 'Total client time,&nbsp;s', #2
'Total queries', #3 'Total queries', #3
'Ignored short queries', #4 'Ignored short queries', #4
'Longest query<br>(sum for all runs), s', #5 'Longest query<br>(sum for all runs),&nbsp;s', #5
'Avg wall clock time<br>(sum for all runs), s', #6 'Avg wall clock time<br>(sum for all runs),&nbsp;s', #6
'Shortest query<br>(sum for all runs), s', #7 'Shortest query<br>(sum for all runs),&nbsp;s', #7
] ]
print(tableStart('Test times')) print(tableStart('Test times'))
@ -328,6 +329,72 @@ if args.report == 'main':
print_test_times() print_test_times()
def print_benchmark_results():
json_reports = [json.load(open(f'benchmark/website-{x}.json')) for x in ['left', 'right']]
stats = [next(iter(x.values()))["statistics"] for x in json_reports]
qps = [x["QPS"] for x in stats]
queries = [x["num_queries"] for x in stats]
errors = [x["num_errors"] for x in stats]
relative_diff = (qps[1] - qps[0]) / max(0.01, qps[0]);
times_diff = max(qps) / max(0.01, min(qps))
all_rows = []
header = ['Benchmark', 'Metric', 'Old', 'New', 'Relative difference', 'Times difference'];
attrs = ['' for x in header]
row = ['website', 'queries', f'{queries[0]:d}', f'{queries[1]:d}', '--', '--']
attrs[0] = 'rowspan=2'
all_rows.append([row, attrs])
attrs = ['' for x in header]
row = [None, 'queries/s', f'{qps[0]:.3f}', f'{qps[1]:.3f}', f'{relative_diff:.3f}', f'x{times_diff:.3f}']
if abs(relative_diff) > 0.1:
# More queries per second is better.
if relative_diff > 0.:
attrs[4] = f'style="background: {color_good}"'
else:
attrs[4] = f'style="background: {color_bad}"'
else:
attrs[4] = ''
all_rows.append([row, attrs]);
if max(errors):
all_rows[0][1][0] = "rowspan=3"
row = [''] * (len(header))
attrs = ['' for x in header]
attrs[0] = None
row[1] = 'errors'
row[2] = f'{errors[0]:d}'
row[3] = f'{errors[1]:d}'
row[4] = '--'
row[5] = '--'
if errors[0]:
attrs[2] += f' style="background: {color_bad}" '
if errors[1]:
attrs[3] += f' style="background: {color_bad}" '
all_rows.append([row, attrs])
print(tableStart('Concurrent benchmarks'))
print(tableHeader(header))
for row, attrs in all_rows:
print(tableRow(row, attrs))
print(tableEnd())
try:
print_benchmark_results()
except:
report_errors.append(
traceback.format_exception_only(
*sys.exc_info()[:2])[-1])
pass
printSimpleTable('Metric changes',
['Metric', 'Old median value', 'New median value',
'Relative difference', 'Times difference'],
tsvRows('metrics/changes.tsv'))
print_report_errors() print_report_errors()
print(""" print("""
@ -394,8 +461,8 @@ elif args.report == 'all-queries':
columns = [ columns = [
# Changed #0 # Changed #0
# Unstable #1 # Unstable #1
'Old, s', #2 'Old,&nbsp;s', #2
'New, s', #3 'New,&nbsp;s', #3
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4 'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4
'Times speedup / slowdown', #5 'Times speedup / slowdown', #5
'p&nbsp;<&nbsp;0.001 threshold', #6 'p&nbsp;<&nbsp;0.001 threshold', #6

View File

@ -20,7 +20,7 @@ RUN apt-get --allow-unauthenticated update -y \
# apt-get --allow-unauthenticated install --yes --no-install-recommends \ # apt-get --allow-unauthenticated install --yes --no-install-recommends \
# pvs-studio # pvs-studio
ENV PKG_VERSION="pvs-studio-7.07.38234.48-amd64.deb" ENV PKG_VERSION="pvs-studio-7.08.39365.50-amd64.deb"
RUN wget "https://files.viva64.com/$PKG_VERSION" RUN wget "https://files.viva64.com/$PKG_VERSION"
RUN sudo dpkg -i "$PKG_VERSION" RUN sudo dpkg -i "$PKG_VERSION"

View File

@ -19,7 +19,7 @@ The table engine (type of table) determines:
### MergeTree {#mergetree} ### MergeTree {#mergetree}
The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, and other features not supported in other engines. The most universal and functional table engines for high-load tasks. The property shared by these engines is quick data insertion with subsequent background data processing. `MergeTree` family engines support data replication (with [Replicated\*](../../engines/table-engines/mergetree-family/replication.md#table_engines-replication) versions of engines), partitioning, secondary data-skipping indexes, and other features not supported in other engines.
Engines in the family: Engines in the family:
@ -80,4 +80,4 @@ To select data from a virtual column, you must specify its name in the `SELECT`
If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We dont recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore. If you create a table with a column that has the same name as one of the table virtual columns, the virtual column becomes inaccessible. We dont recommend doing this. To help avoid conflicts, virtual column names are usually prefixed with an underscore.
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/engines/table-engines/) <!--hide-->

View File

@ -0,0 +1,25 @@
---
title: What is a columnar database?
toc_hidden: true
toc_priority: 101
---
# What Is a Columnar Database? {#what-is-a-columnar-database}
A columnar database stores data of each column independently. This allows to read data from disks only for those columns that are used in any given query. The cost is that operations that affect whole rows become proportionally more expensive. The synonym for a columnar database is a column-oriented database management system. ClickHouse is a typical example of such a system.
Key columnar database advantages are:
- Queries that use only a few columns out of many.
- Aggregating queries against large volumes of data.
- Column-wise data compression.
Here is the illustration of the difference between traditional row-oriented systems and columnar databases when building reports:
**Traditional row-oriented**
![Traditional row-oriented](https://clickhouse.tech/docs/en/images/row-oriented.gif#)
**Columnar**
![Columnar](https://clickhouse.tech/docs/en/images/column-oriented.gif#)
A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but dont pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing because and data warehousing, they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables.

View File

@ -1,11 +1,17 @@
--- ---
title: "What does \u201CClickHouse\u201D mean?"
toc_hidden: true toc_hidden: true
toc_priority: 10 toc_priority: 10
--- ---
# What Does “ClickHouse” Mean? {#what-does-clickhouse-mean} # What Does “ClickHouse” Mean? {#what-does-clickhouse-mean}
Its a combination of “**Click**stream” and “Data ware**house**”. It comes from the original use case at Yandex.Metrica, where ClickHouse was supposed to keep records of all clicks by people from all over the Internet and it still does the job. You can read more about this use case on [ClickHouse history](../../introduction/history.md) page. Its a combination of “**Click**stream” and “Data ware**House**”. It comes from the original use case at Yandex.Metrica, where ClickHouse was supposed to keep records of all clicks by people from all over the Internet and it still does the job. You can read more about this use case on [ClickHouse history](../../introduction/history.md) page.
This two-part meaning has two consequences:
- The only correct way to write Click**H**ouse is with capital H.
- If you need to abbreviate it, use **CH**. For some historical reasons, abbreviating as CK is also popular in China, mostly because one of the first talks about ClickHouse in Chinese used this form.
!!! info "Fun fact" !!! info "Fun fact"
Many years after ClickHouse got its name, this approach of combining two words that are meaningful on their own has been highlighted as the best way to name a database in a [research by Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), an Associate Professor of Databases at Carnegie Mellon University. ClickHouse shared his “best database name of all time” award with Postgres. Many years after ClickHouse got its name, this approach of combining two words that are meaningful on their own has been highlighted as the best way to name a database in a [research by Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), an Associate Professor of Databases at Carnegie Mellon University. ClickHouse shared his “best database name of all time” award with Postgres.

View File

@ -1,4 +1,5 @@
--- ---
title: General questions about ClickHouse
toc_hidden_folder: true toc_hidden_folder: true
toc_priority: 1 toc_priority: 1
toc_title: General toc_title: General
@ -8,8 +9,13 @@ toc_title: General
Questions: Questions:
- [What is ClickHouse?](../../index.md#what-is-clickhouse)
- [Why ClickHouse is so fast?](../../faq/general/why-clickhouse-is-so-fast.md)
- [Who is using ClickHouse?](../../faq/general/who-is-using-clickhouse.md)
- [What does “ClickHouse” mean?](../../faq/general/dbms-naming.md) - [What does “ClickHouse” mean?](../../faq/general/dbms-naming.md)
- [What does “Не тормозит” mean?](../../faq/general/ne-tormozit.md) - [What does “Не тормозит” mean?](../../faq/general/ne-tormozit.md)
- [What is OLAP?](../../faq/general/olap.md)
- [What is a columnar database?](../../faq/general/columnar-database.md)
- [Why not use something like MapReduce?](../../faq/general/mapreduce.md) - [Why not use something like MapReduce?](../../faq/general/mapreduce.md)
!!! info "Dont see what you were looking for?" !!! info "Dont see what you were looking for?"

View File

@ -1,6 +1,7 @@
--- ---
title: Why not use something like MapReduce?
toc_hidden: true toc_hidden: true
toc_priority: 20 toc_priority: 110
--- ---
# Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce} # Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce}

View File

@ -1,13 +1,15 @@
--- ---
title: "What does \u201C\u043D\u0435 \u0442\u043E\u0440\u043C\u043E\u0437\u0438\u0442\
\u201D mean?"
toc_hidden: true toc_hidden: true
toc_priority: 11 toc_priority: 11
--- ---
# What Does “Не тормозит” mean? {#what-does-ne-tormozit-mean} # What Does “Не тормозит” Mean? {#what-does-ne-tormozit-mean}
This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front.
Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). Thats why it initially got its slogan in Russian, which is “не тормозит”. After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). Thats why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is.
One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it.

View File

@ -0,0 +1,39 @@
---
title: What is OLAP?
toc_hidden: true
toc_priority: 100
---
# What Is OLAP? {#what-is-olap}
[OLAP](https://en.wikipedia.org/wiki/Online_analytical_processing) stands for Online Analytical Processing. It is a broad term that can be looked at from two perspectives: technical and business. But at the very high level, you can just read these words backward:
Processing
: Some source data is processed…
Analytical
: …to produce some analytical reports and insights…
Online
: …in real-time.
## OLAP from the Business Perspective {#olap-from-the-business-perspective}
In recent years, business people started to realize the value of data. Companies who make their decisions blindly, more often than not fail to keep up with the competition. The data-driven approach of successful companies forces them to collect all data that might be remotely useful for making business decisions and need mechanisms to timely analyze them. Heres where OLAP database management systems (DBMS) come in.
In a business sense, OLAP allows companies to continuously plan, analyze, and report operational activities, thus maximizing efficiency, reducing expenses, and ultimately conquering the market share. It could be done either in an in-house system or outsourced to SaaS providers like web/mobile analytics services, CRM services, etc. OLAP is the technology behind many BI applications (Business Intelligence).
ClickHouse is an OLAP database management system that is pretty often used as a backend for those SaaS solutions for analyzing domain-specific data. However, some businesses are still reluctant to share their data with third-party providers and an in-house data warehouse scenario is also viable.
## OLAP from the Technical Perspective {#olap-from-the-technical-perspective}
All database management systems could be classified into two groups: OLAP (Online **Analytical** Processing) and OLTP (Online **Transactional** Processing). Former focuses on building reports, each based on large volumes of historical data, but doing it not so frequently. While the latter usually handle a continuous stream of transactions, constantly modifying the current state of data.
In practice OLAP and OLTP are not categories, its more like a spectrum. Most real systems usually focus on one of them but provide some solutions or workarounds if the opposite kind of workload is also desired. This situation often forces businesses to operate multiple storage systems integrated, which might be not so big deal but having more systems make it more expensive to maintain. So the trend of recent years is HTAP (**Hybrid Transactional/Analytical Processing**) when both kinds of the workload are handled equally well by a single database management system.
Even if a DBMS started as a pure OLAP or pure OLTP, they are forced to move towards that HTAP direction to keep up with their competition. And ClickHouse is no exception, initially, it has been designed as [fast-as-possible OLAP system](../../faq/general/why-clickhouse-is-so-fast.md) and it still doesnt have full-fledged transaction support, but some features like consistent read/writes and mutations for updating/deleting data had to be added.
The fundamental trade-off between OLAP and OLTP systems remains:
- To build analytical reports efficiently its crucial to be able to read columns separately, thus most OLAP databases are [columnar](../../faq/general/columnar-database.md),
- While storing columns separately increases costs of operations on rows, like append or in-place modification, proportionally to the number of columns (which can be huge if the systems try to collect all details of an event just in case). Thus, most OLTP systems store data arranged by rows.

View File

@ -0,0 +1,19 @@
---
title: Who is using ClickHouse?
toc_hidden: true
toc_priority: 9
---
# Who Is Using ClickHouse? {#who-is-using-clickhouse}
Being an open-source product makes this question not so straightforward to answer. You dont have to tell anyone if you want to start using ClickHouse, you just go grab source code or pre-compiled packages. Theres no contract to sign and the [Apache 2.0 license](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) allows for unconstrained software distribution.
Also, the technology stack is often in a grey zone of whats covered by an NDA. Some companies consider technologies they use as a competitive advantage even if they are open-source and dont allow employees to share any details publicly. Some see some PR risks and allow employees to share implementation details only with their PR department approval.
So how to tell who is using ClickHouse?
One way is to **ask around**. If its not in writing, people are much more willing to share what technologies are used in their companies, what the use cases are, what kind of hardware is used, data volumes, etc. Were talking with users regularly on [ClickHouse Meetups](https://www.youtube.com/channel/UChtmrD-dsdpspr42P_PyRAw/playlists) all over the world and have heard stories about 1000+ companies that use ClickHouse. Unfortunately, thats not reproducible and we try to treat such stories as if they were told under NDA to avoid any potential troubles. But you can come to any of our future meetups and talk with other users on your own. There are multiple ways how meetups are announced, for example, you can subscribe to [our Twitter](http://twitter.com/ClickHouseDB/).
The second way is to look for companies **publicly saying** that they use ClickHouse. Its more substantial because theres usually some hard evidence like a blog post, talk video recording, slide deck, etc. We collect the collection of links to such evidence on our **[Adopters](../../introduction/adopters.md)** page. Feel free to contribute the story of your employer or just some links youve stumbled upon (but try not to violate your NDA in the process).
You can find names of very large companies in the adopters list, like Bloomberg, Cisco, China Telecom, Tencent, or Uber, but with the first approach, we found that there are many more. For example, if you take [the list of largest IT companies by Forbes (2020)](https://www.forbes.com/sites/hanktucker/2020/05/13/worlds-largest-technology-companies-2020-apple-stays-on-top-zoom-and-uber-debut/) over half of them are using ClickHouse in some way. Also, it would be unfair not to mention [Yandex](../../introduction/history.md), the company which initially open-sourced ClickHouse in 2016 and happens to be one of the largest IT companies in Europe.

View File

@ -0,0 +1,63 @@
---
title: Why ClickHouse is so fast?
toc_hidden: true
toc_priority: 8
---
# Why ClickHouse Is So Fast? {#why-clickhouse-is-so-fast}
It was designed to be fast. Query execution performance has always been a top priority during the development process, but other important characteristics like user-friendliness, scalability, and security were also considered so ClickHouse could become a real production system.
ClickHouse was initially built as a prototype to do just a single task well: to filter and aggregate data as fast as possible. Thats what needs to be done to build a typical analytical report and thats what a typical [GROUP BY](../../sql-reference/statements/select/group-by.md) query does. ClickHouse team has made several high-level decisions that combined made achieving this task possible:
Column-oriented storage
: Source data often contain hundreds or even thousands of columns, while a report can use just a few of them. The system needs to avoid reading unnecessary columns, or most expensive disk read operations would be wasted.
Indexes
: ClickHouse keeps data structures in memory that allows reading not only used columns but only necessary row ranges of those columns.
Data compression
: Storing different values of the same column together often leads to better compression ratios (compared to row-oriented systems) because in real data column often has the same or not so many different values for neighboring rows. In addition to general-purpose compression, ClickHouse supports [specialized codecs](../../sql-reference/statements/create.md#create-query-specialized-codecs) that can make data even more compact.
Vectorized query execution
: ClickHouse not only stores data in columns but also processes data in columns. It leads to better CPU cache utilization and allows for [SIMD](https://en.wikipedia.org/wiki/SIMD) CPU instructions usage.
Scalability
: ClickHouse can leverage all available CPU cores and disks to execute even a single query. Not only on a single server but all CPU cores and disks of a cluster as well.
But many other database management systems use similar techniques. What really makes ClickHouse stand out is **attention to low-level details**. Most programming languages provide implementations for most common algorithms and data structures, but they tend to be too generic to be effective. Every task can be considered as a landscape with various characteristics, instead of just throwing in random implementation. For example, if you need a hash table, here are some key questions to consider:
- Which hash function to choose?
- Collision resolution algorithm: [open addressing](https://en.wikipedia.org/wiki/Open_addressing) vs [chaining](https://en.wikipedia.org/wiki/Hash_table#Separate_chaining)?
- Memory layout: one array for keys and values or separate arrays? Will it store small or large values?
- Fill factor: when and how to resize? How to move values around on resize?
- Will values be removed and which algorithm will work better if they will?
- Will we need fast probing with bitmaps, inline placement of string keys, support for non-movable values, prefetch, and batching?
Hash table is a key data structure for `GROUP BY` implementation and ClickHouse automatically chooses one of [30+ variations](https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/Aggregator.h) for each specific query.
The same goes for algorithms, for example, in sorting you might consider:
- What will be sorted: an array of numbers, tuples, strings, or structures?
- Is all data available completely in RAM?
- Do we need a stable sort?
- Do we need a full sort? Maybe partial sort or n-th element will suffice?
- How to implement comparisons?
- Are we sorting data that has already been partially sorted?
Algorithms that they rely on characteristics of data they are working with can often do better than their generic counterparts. If it is not really known in advance, the system can try various implementations and choose the one that works best in runtime. For example, see an [article on how LZ4 decompression is implemented in ClickHouse](https://habr.com/en/company/yandex/blog/457612/).
Last but not least, the ClickHouse team always monitors the Internet on people claiming that they came up with the best implementation, algorithm, or data structure to do something and tries it out. Those claims mostly appear to be false, but from time to time youll indeed find a gem.
!!! info "Tips for building your own high-performance software"
- Keep in mind low-level details when designing your system.
- Design based on hardware capabilities.
- Choose data structures and abstractions based on the needs of the task.
- Provide specializations for special cases.
- Try new, “best” algorithms, that you read about yesterday.
- Choose an algorithm in runtime based on statistics.
- Benchmark on real datasets.
- Test for performance regressions in CI.
- Measure and observe everything.

View File

@ -10,8 +10,37 @@ This section of the documentation is a place to collect answers to ClickHouse-re
Categories: Categories:
- [General](../faq/general/index.md) - **[General](../faq/general/index.md)**
- [Operations](../faq/operations/index.md) - [What is ClickHouse?](../index.md#what-is-clickhouse)
- [Integration](../faq/integration/index.md) - [Why ClickHouse is so fast?](../faq/general/why-clickhouse-is-so-fast.md)
- [Who is using ClickHouse?](../faq/general/who-is-using-clickhouse.md)
- [What does “ClickHouse” mean?](../faq/general/dbms-naming.md)
- [What does “Не тормозит” mean?](../faq/general/ne-tormozit.md)
- [What is OLAP?](../faq/general/olap.md)
- [What is a columnar database?](../faq/general/columnar-database.md)
- [Why not use something like MapReduce?](../faq/general/mapreduce.md)
- **[Use Cases](../faq/use-cases/index.md)**
- [Can I use ClickHouse as a time-series database?](../faq/use-cases/time-series.md)
- [Can I use ClickHouse as a key-value storage?](../faq/use-cases/key-value.md)
- **[Operations](../faq/operations/index.md)**
- [Which ClickHouse version to use in production?](../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../faq/operations/delete-old-data.md)
- **[Integration](../faq/integration/index.md)**
- [How do I export data from ClickHouse to a file?](../faq/integration/file-export.md)
- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../faq/integration/oracle-odbc.md)
{## TODO
Question candidates:
- How to choose a primary key?
- How to add a column in ClickHouse?
- Too many parts
- How to filter ClickHouse table by an array column contents?
- How to insert all rows from one table to another of identical structure?
- How to kill a process (query) in ClickHouse?
- How to implement pivot (like in pandas)?
- How to remove the default ClickHouse user through users.d?
- Importing MySQL dump to Clickhouse
- Window function workarounds (row\_number, lag/lead, running diff/sum/average)
##}
{## [Original article](https://clickhouse.tech/docs/en/faq) ##} {## [Original article](https://clickhouse.tech/docs/en/faq) ##}

View File

@ -1,4 +1,5 @@
--- ---
title: How do I export data from ClickHouse to a file?
toc_hidden: true toc_hidden: true
toc_priority: 10 toc_priority: 10
--- ---

View File

@ -1,15 +1,17 @@
--- ---
title: Questions about integrating ClickHouse and other systems
toc_hidden_folder: true toc_hidden_folder: true
toc_priority: 3 toc_priority: 4
toc_title: Integration toc_title: Integration
--- ---
# Question About Integrating ClickHouse and Other Systems {#question-about-integrating-clickhouse-and-other-systems} # Questions About Integrating ClickHouse and Other Systems {#question-about-integrating-clickhouse-and-other-systems}
Questions: Questions:
- [How do I export data from ClickHouse to a file?](../../faq/integration/file-export.md) - [How do I export data from ClickHouse to a file?](../../faq/integration/file-export.md)
- [What if I Have a problem with encodings when connecting to Oracle via ODBC?](../../faq/integration/oracle-odbc.md) - [How to import JSON into ClickHouse?](../../faq/integration/json-import.md)
- [What if I have a problem with encodings when connecting to Oracle via ODBC?](../../faq/integration/oracle-odbc.md)
!!! info "Dont see what you were looking for?" !!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.

View File

@ -0,0 +1,33 @@
---
title: How to import JSON into ClickHouse?
toc_hidden: true
toc_priority: 11
---
# How to Import JSON Into ClickHouse? {#how-to-import-json-into-clickhouse}
ClickHouse supports a wide range of [data formats for input and output](../../interfaces/formats.md). There are multiple JSON variations among them, but the most commonly used for data ingestion is [JSONEachRow](../../interfaces/formats.md#jsoneachrow). It expects one JSON object per row, each object separated by a newline.
## Examples {#examples}
Using [HTTP interface](../../interfaces/http.md):
``` bash
$ echo '{"foo":"bar"}' | curl 'http://localhost:8123/?query=INSERT%20INTO%20test%20FORMAT%20JSONEachRow' --data-binary @-
```
Using [CLI interface](../../interfaces/cli.md):
``` bash
$ echo '{"foo":"bar"}' | clickhouse-client ---query="INSERT INTO test FORMAT 20JSONEachRow"
```
Instead of inserting data manually, you might consider to use one of [client libraries](../../interfaces/index.md) instead.
## Useful Settings {#useful-settings}
- `input_format_skip_unknown_fields` allows to insert JSON even if there were additional fields not present in table schema (by discarding them).
- `input_format_import_nested_json` allows to insert nested JSON objects into columns of [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) type.
!!! note "Note"
Settings are specified as `GET` parameters for the HTTP interface or as additional command-line arguments prefixed with `--` for the CLI interface.

View File

@ -1,4 +1,5 @@
--- ---
title: What if I have a problem with encodings when using Oracle via ODBC?
toc_hidden: true toc_hidden: true
toc_priority: 20 toc_priority: 20
--- ---

View File

@ -0,0 +1,42 @@
---
title: Is it possible to delete old records from a ClickHouse table?
toc_hidden: true
toc_priority: 20
---
# Is It Possible to Delete Old Records from a ClickHouse Table? {#is-it-possible-to-delete-old-records-from-a-clickhouse-table}
The short answer is “yes”. ClickHouse has multiple mechanisms that allow freeing up disk space by removing old data. Each mechanism is aimed for different scenarios.
## TTL {#ttl}
ClickHouse allows to automatically drop values when some condition happens. This condition is configured as an expression based on any columns, usually just static offset for any timestamp column.
The key advantage of this approach is that it doesnt need any external system to trigger, once TTL is configured, data removal happens automatically in background.
!!! note "Note"
TTL can also be used to move data not only to [/dev/null](https://en.wikipedia.org/wiki/Null_device), but also between different storage systems, like from SSD to HDD.
More details on [configuring TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
## ALTER DELETE {#alter-delete}
ClickHouse doesnt have real-time point deletes like in [OLTP](https://en.wikipedia.org/wiki/Online_transaction_processing) databases. The closest thing to them are mutations. They are issued as `ALTER ... DELETE` or `ALTER ... UPDATE` queries to distinguish from normal `DELETE` or `UPDATE` as they are asynchronous batch operations, not immediate modifications. The rest of syntax after `ALTER TABLE` prefix is similar.
`ALTER DELETE` can be issued to flexibly remove old data. If you need to do it regularly, the main downside will be the need to have an external system to submit the query. There are also some performance considerations since mutation rewrite complete parts even theres only a single row to be deleted.
This is the most common approach to make your system based on ClickHouse [GDPR](https://gdpr-info.eu)-compliant.
More details on [mutations](../../sql-reference/statements/alter.md#alter-mutations).
## DROP PARTITION {#drop-partition}
`ALTER TABLE ... DROP PARTITION` provides a cost-efficient way to drop a whole partition. Its not that flexible and needs proper partitioning scheme configured on table creation, but still covers most common cases. Like mutations need to be executed from an external system for regular use.
More details on [manipulating partitions](../../sql-reference/statements/alter.md#alter_drop-partition).
## TRUNCATE {#truncate}
Its rather radical to drop all data from a table, but in some cases it might be exactly what you need.
More details on [table truncation](../../sql-reference/statements/alter.md#alter_drop-partition).

View File

@ -1,6 +1,7 @@
--- ---
title: Question about operating ClickHouse servers and clusters
toc_hidden_folder: true toc_hidden_folder: true
toc_priority: 2 toc_priority: 3
toc_title: Operations toc_title: Operations
--- ---
@ -9,6 +10,7 @@ toc_title: Operations
Questions: Questions:
- [Which ClickHouse version to use in production?](../../faq/operations/production.md) - [Which ClickHouse version to use in production?](../../faq/operations/production.md)
- [Is it possible to delete old records from a ClickHouse table?](../../faq/operations/delete-old-data.md)
!!! info "Dont see what you were looking for?" !!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar. Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.

View File

@ -1,4 +1,5 @@
--- ---
title: Which ClickHouse version to use in production?
toc_hidden: true toc_hidden: true
toc_priority: 10 toc_priority: 10
--- ---

View File

@ -0,0 +1,18 @@
---
title: Questions about ClickHouse use cases
toc_hidden_folder: true
toc_priority: 2
toc_title: Use Cases
---
# Questions About ClickHouse Use Cases {#questions-about-clickhouse-use-cases}
Questions:
- [Can I use ClickHouse as a time-series database?](../../faq/use-cases/time-series.md)
- [Can I use ClickHouse as a key-value storage?](../../faq/use-cases/key-value.md)
!!! info "Dont see what you were looking for?"
Check out [other F.A.Q. categories](../../faq/index.md) or browse around main documentation articles found in the left sidebar.
{## [Original article](https://clickhouse.tech/docs/en/faq/use-cases/) ##}

View File

@ -0,0 +1,17 @@
---
title: Can I use ClickHouse as a key-value storage?
toc_hidden: true
toc_priority: 101
---
# Can I Use ClickHouse As a Key-Value Storage? {#can-i-use-clickhouse-as-a-key-value-storage}
The short answer is **“no”**. The key-value workload is among top positions in the list of cases when NOT{.text-danger} to use ClickHouse. Its an [OLAP](../../faq/general/olap.md) system after all, while there are many excellent key-value storage systems out there.
However, there might be situations where it still makes sense to use ClickHouse for key-value-like queries. Usually, its some low-budget products where the main workload is analytical in nature and fits ClickHouse well, but theres also some secondary process that needs a key-value pattern with not so high request throughput and without strict latency requirements. If you had an unlimited budget, you would have installed a secondary key-value database for thus secondary workload, but in reality, theres an additional cost of maintaining one more storage system (monitoring, backups, etc.) which might be desirable to avoid.
If you decide to go against recommendations and run some key-value-like queries against ClickHouse, herere some tips:
- The key reason why point queries are expensive in ClickHouse is its sparse primary index of main [MergeTree table engine family](../../engines/table-engines/mergetree-family/mergetree.md). This index cant point to each specific row of data, instead, it points to each N-th and the system has to scan from the neighboring N-th row to the desired one, reading excessive data along the way. In a key-value scenario, it might be useful to reduce the value of N with the `index_granularity` setting.
- ClickHouse keeps each column in a separate set of files, so to assemble one complete row it needs to go through each of those files. Their count increases linearly with the number of columns, so in the key-value scenario, it might be worth to avoid using many columns and put all your payload in a single `String` column encoded in some serialization format like JSON, Protobuf or whatever makes sense.
- Theres an alternative approach that uses [Join](../../engines/table-engines/special/join.md) table engine instead of normal `MergeTree` tables and [joinGet](../../sql-reference/functions/other-functions.md#joinget) function to retrieve the data. It can provide better query performance but might have some usability and reliability issues. Heres an [usage example](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00800_versatile_storage_join.sql#L49-L51).

View File

@ -0,0 +1,15 @@
---
title: Can I use ClickHouse as a time-series database?
toc_hidden: true
toc_priority: 101
---
# Can I Use ClickHouse As a Time-Series Database? {#can-i-use-clickhouse-as-a-time-series-database}
ClickHouse is a generic data storage solution for [OLAP](../../faq/general/olap.md) workloads, while there are many specialized time-series database management systems. Nevertheless, ClickHouses [focus on query execution speed](../../faq/general/why-clickhouse-is-so-fast.md) allows it to outperform specialized systems in many cases. There are many independent benchmarks on this topic out there ([example](https://medium.com/@AltinityDB/clickhouse-for-time-series-scalability-benchmarks-e181132a895b)), so were not going to conduct one here. Instead, lets focus on ClickHouse features that are important to use if thats your use case.
First of all, there are **[specialized codecs](../../sql-reference/statements/create.md#create-query-specialized-codecs)** which make typical time-series. Either common algorithms like `DoubleDelta` and `Gorilla` or specific to ClickHouse like `T64`.
Second, time-series queries often hit only recent data, like one day or one week old. It makes sense to use servers that have both fast nVME/SSD drives and high-capacity HDD drives. ClickHouse [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) feature allows to configure keeping fresh hot data on fast drives and gradually move it to slower drives as it ages. Rollup or removal of even older data is also possible if your requirements demand it.
Even though its against ClickHouse philosophy of storing and processing raw data, you can use [materialized views](../../sql-reference/statements/create.md#create-view) to fit into even tighter latency or costs requirements.

View File

@ -94,6 +94,18 @@ For production environments, its recommended to use the latest `stable`-versi
To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside. To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Those images use official `deb` packages inside.
### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux}
For non-Linux operating systems and for AArch64 CPU arhitecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay).
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
After downloading, you can use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. To run `clickhouse server`, you have to additionally download [server](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) and [users](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) configuration files from GitHub.
These builds are not recommended for use in production environments because they are less thoroughly tested, but you can do so on your own risk. They also have only a subset of ClickHouse features available.
### From Sources {#from-sources} ### From Sources {#from-sources}
To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md). To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md).

View File

@ -307,11 +307,11 @@ Logging settings.
Keys: Keys:
- level Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. - `level` Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`.
- log The log file. Contains all the entries according to `level`. - `log` The log file. Contains all the entries according to `level`.
- errorlog Error log file. - `errorlog` Error log file.
- size Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. - `size` Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place.
- count The number of archived log files that ClickHouse stores. - `count` The number of archived log files that ClickHouse stores.
**Example** **Example**
@ -348,6 +348,30 @@ Keys:
Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON otherwise.` Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON otherwise.`
- format Message format. Possible values: `bsd` and `syslog.` - format Message format. Possible values: `bsd` and `syslog.`
## send_crash_reports {#server_configuration_parameters-logger}
Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io).
Enabling it, especially in pre-production environments, is greatly appreciated.
The server will need an access to public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly.
Keys:
- `enabled` Boolean flag to enable the feature. Set to `true` to allow sending crash reports.
- `endpoint` Overrides the Sentry endpoint.
- `anonymize` - Avoid attaching the server hostname to crash report.
- `http_proxy` - Configure HTTP proxy for sending crash reports.
- `debug` - Sets the Sentry client into debug mode.
- `tmp_path` - Filesystem path for temporary crash report state.
**Recommended way to use**
``` xml
<send_crash_reports>
<enabled>true</enabled>
</send_crash_reports>
```
## macros {#macros} ## macros {#macros}
Parameter substitutions for replicated tables. Parameter substitutions for replicated tables.
@ -426,6 +450,18 @@ The value 0 means that you can delete all tables without any restrictions.
<max_table_size_to_drop>0</max_table_size_to_drop> <max_table_size_to_drop>0</max_table_size_to_drop>
``` ```
## max\_thread\_pool\_size {#max-thread-pool-size}
The maximum number of threads in the Global Thread pool.
Default value: 10000.
**Example**
``` xml
<max_thread_pool_size>12000</max_thread_pool_size>
```
## merge\_tree {#server_configuration_parameters-merge_tree} ## merge\_tree {#server_configuration_parameters-merge_tree}
Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).

View File

@ -1129,6 +1129,18 @@ Possible values:
Default value: 0 Default value: 0
## optimize\_skip\_unused\_shards\_nesting {#optimize-skip-unused-shards-nesting}
Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).
Possible values:
- 0 — Disabled, `optimize_skip_unused_shards` works always.
- 1 — Enables `optimize_skip_unused_shards` only for the first level.
- 2 — Enables `optimize_skip_unused_shards` up to the second level.
Default value: 0
## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards} ## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards}
Enables or disables query execution if [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown. Enables or disables query execution if [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown.
@ -1141,16 +1153,17 @@ Possible values:
Default value: 0 Default value: 0
## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} ## force\_optimize\_skip\_unused\_shards\_nesting {#settings-force_optimize_skip_unused_shards_nesting}
Reset [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) for nested `Distributed` table Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).
Possible values: Possible values:
- 1 — Enabled. - 0 - Disabled, `force_optimize_skip_unused_shards` works always.
- 0 — Disabled. - 1 — Enables `force_optimize_skip_unused_shards` only for the first level.
- 2 — Enables `force_optimize_skip_unused_shards` up to the second level.
Default value: 0. Default value: 0
## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}

View File

@ -176,6 +176,54 @@ hasAny(array1, array2)
`SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` returns `1`. `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` returns `1`.
## hasSubstr {#hassubstr}
Checks whether all the elements of array2 appear in array1 in the same exact order. Therefore, the function will return 1, if and only if `array1 = prefix + array2 + suffix`.
``` sql
hasSubstr(array1, array2)
```
In other words, the functions will check whether all the elements of `array2` are contained in `array1` like
the `hasAll` function. In addition, it will check that the elements are observed in the same order in both `array1` and `array2`.
For Example:
- `hasSubstr([1,2,3,4], [2,3])` returns 1. However, `hasSubstr([1,2,3,4], [3,2])` will return `0`.
- `hasSubstr([1,2,3,4], [1,2,3])` returns 1. However, `hasSubstr([1,2,3,4], [1,2,4])` will return `0`.
**Parameters**
- `array1` Array of any type with a set of elements.
- `array2` Array of any type with a set of elements.
**Return values**
- `1`, if `array1` contains `array2`.
- `0`, otherwise.
**Peculiar properties**
- The function will return `1` if `array2` is empty.
- `Null` processed as a value. In other words `hasSubstr([1, 2, NULL, 3, 4], [2,3])` will return `0`. However, `hasSubstr([1, 2, NULL, 3, 4], [2,NULL,3])` will return `1`
- Order of values in both of arrays does matter.
**Examples**
`SELECT hasSubstr([], [])` returns 1.
`SELECT hasSubstr([1, Null], [Null])` returns 1.
`SELECT hasSubstr([1.0, 2, 3, 4], [1, 3])` returns 0.
`SELECT hasSubstr(['a', 'b'], ['a'])` returns 1.
`SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'b'])` returns 1.
`SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'c'])` returns 0.
`SELECT hasSubstr([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]])` returns 1.
## indexOf(arr, x) {#indexofarr-x} ## indexOf(arr, x) {#indexofarr-x}
Returns the index of the first x element (starting from 1) if it is in the array, or 0 if it is not. Returns the index of the first x element (starting from 1) if it is in the array, or 0 if it is not.

View File

@ -17,7 +17,7 @@ SELECT [DISTINCT] expr_list
[FROM [db.]table | (subquery) | table_function] [FINAL] [FROM [db.]table | (subquery) | table_function] [FINAL]
[SAMPLE sample_coeff] [SAMPLE sample_coeff]
[ARRAY JOIN ...] [ARRAY JOIN ...]
[GLOBAL] [ANY|ALL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER] JOIN (subquery)|table USING columns_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr] [PREWHERE expr]
[WHERE expr] [WHERE expr]
[GROUP BY expr_list] [WITH TOTALS] [GROUP BY expr_list] [WITH TOTALS]

View File

@ -12,6 +12,7 @@ toc_title: SYSTEM
- [DROP MARK CACHE](#query_language-system-drop-mark-cache) - [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache) - [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache) - [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs) - [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config) - [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown) - [SHUTDOWN](#query_language-system-shutdown)
@ -67,6 +68,24 @@ For more convenient (automatic) cache management, see disable\_internal\_dns\_ca
Resets the mark cache. Used in development of ClickHouse and performance tests. Resets the mark cache. Used in development of ClickHouse and performance tests.
## DROP REPLICA {#query_language-system-drop-replica}
Dead replicas can be dropped using following syntax:
```sql
SYSTEM DROP REPLICA 'replica_name' FROM TABLE database.table;
SYSTEM DROP REPLICA 'replica_name' FROM DATABASE database;
SYSTEM DROP REPLICA 'replica_name';
SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
```
Queries will remove the replica path in ZooKeeper. It's useful when replica is dead and its metadata cannot be removed from ZooKeeper by `DROP TABLE` because there is no such table anymore. It will only drop the inactive/stale replica, and it can't drop local replica, please use `DROP TABLE` for that. `DROP REPLICA` does not drop any tables and does not remove any data or metadata from disk.
The first one removes metadata of `'replica_name'` replica of `database.table` table.
The second one does the same for all replicated tables in the database.
The third one does the same for all replicated tables on local server.
The forth one is useful to remove metadata of dead replica when all other replicas of a table were dropped. It requires the table path to be specified explicitly. It must be the same path as was passed to the first argument of `ReplicatedMergeTree` engine on table creation.
## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache} ## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
Reset the uncompressed data cache. Used in development of ClickHouse and performance tests. Reset the uncompressed data cache. Used in development of ClickHouse and performance tests.

View File

@ -1,6 +1,10 @@
--- ---
toc_folder_title: What's New toc_folder_title: What's New
toc_priority: 72 toc_priority: 82
--- ---
# What's New In ClickHouse?
There's a short high-level [roadmap](roadmap.md) and a detailed [changelog](changelog/index.md) for releases that have already been published.

View File

@ -5,12 +5,14 @@ toc_title: Roadmap
# Roadmap {#roadmap} # Roadmap {#roadmap}
## Q2 2020 {#q2-2020}
- Integration with external authentication services
## Q3 2020 {#q3-2020} ## Q3 2020 {#q3-2020}
- High durability mode (`fsync` and WAL)
- Support spilling data to disk in `GLOBAL JOIN`
## Q4 2020 {#q4-2020}
- Improved efficiency of distributed queries
- Resource pools for more precise distribution of cluster capacity between users - Resource pools for more precise distribution of cluster capacity between users
{## [Original article](https://clickhouse.tech/docs/en/roadmap/) ##} {## [Original article](https://clickhouse.tech/docs/en/roadmap/) ##}

View File

@ -1048,17 +1048,6 @@ Valores posibles:
Valor predeterminado: 0 Valor predeterminado: 0
## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested}
Restablecer [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) para anidados `Distributed` tabla
Valores posibles:
- 1 — Enabled.
- 0 — Disabled.
Valor predeterminado: 0.
## Optize\_throw\_if\_noop {#setting-optimize_throw_if_noop} ## Optize\_throw\_if\_noop {#setting-optimize_throw_if_noop}
Habilita o deshabilita el lanzamiento de una excepción [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) la consulta no realizó una fusión. Habilita o deshabilita el lanzamiento de una excepción [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) la consulta no realizó una fusión.

View File

@ -15,7 +15,7 @@ SELECT [DISTINCT] expr_list
[FROM [db.]table | (subquery) | table_function] [FINAL] [FROM [db.]table | (subquery) | table_function] [FINAL]
[SAMPLE sample_coeff] [SAMPLE sample_coeff]
[ARRAY JOIN ...] [ARRAY JOIN ...]
[GLOBAL] [ANY|ALL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER] JOIN (subquery)|table USING columns_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr] [PREWHERE expr]
[WHERE expr] [WHERE expr]
[GROUP BY expr_list] [WITH TOTALS] [GROUP BY expr_list] [WITH TOTALS]

View File

@ -1048,17 +1048,6 @@ The results of the compilation are saved in the build directory in the form of .
مقدار پیشفرض: 0 مقدار پیشفرض: 0
## به زور \_بهتیتیتیتی\_سکیپ\_اس\_ش\_شارد\_مایش داده میشود {#settings-force_optimize_skip_unused_shards_no_nested}
بازنشانی [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) برای تو در تو `Distributed` جدول
مقادیر ممکن:
- 1 — Enabled.
- 0 — Disabled.
مقدار پیش فرض: 0.
## ا\_فزون\_ف\_کوپ {#setting-optimize_throw_if_noop} ## ا\_فزون\_ف\_کوپ {#setting-optimize_throw_if_noop}
را قادر می سازد و یا غیر فعال پرتاب یک استثنا اگر یک [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) پرس و جو یک ادغام انجام نمی. را قادر می سازد و یا غیر فعال پرتاب یک استثنا اگر یک [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) پرس و جو یک ادغام انجام نمی.

View File

@ -15,7 +15,7 @@ SELECT [DISTINCT] expr_list
[FROM [db.]table | (subquery) | table_function] [FINAL] [FROM [db.]table | (subquery) | table_function] [FINAL]
[SAMPLE sample_coeff] [SAMPLE sample_coeff]
[ARRAY JOIN ...] [ARRAY JOIN ...]
[GLOBAL] [ANY|ALL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER] JOIN (subquery)|table USING columns_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr] [PREWHERE expr]
[WHERE expr] [WHERE expr]
[GROUP BY expr_list] [WITH TOTALS] [GROUP BY expr_list] [WITH TOTALS]

View File

@ -1048,17 +1048,6 @@ Valeurs possibles:
Valeur par défaut: 0 Valeur par défaut: 0
## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested}
Réinitialiser [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) pour imbriquée `Distributed` table
Valeurs possibles:
- 1 — Enabled.
- 0 — Disabled.
Valeur par défaut: 0.
## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}
Active ou désactive le lancement d'une exception si [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) la requête n'a pas effectué de fusion. Active ou désactive le lancement d'une exception si [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) la requête n'a pas effectué de fusion.

View File

@ -15,7 +15,7 @@ SELECT [DISTINCT] expr_list
[FROM [db.]table | (subquery) | table_function] [FINAL] [FROM [db.]table | (subquery) | table_function] [FINAL]
[SAMPLE sample_coeff] [SAMPLE sample_coeff]
[ARRAY JOIN ...] [ARRAY JOIN ...]
[GLOBAL] [ANY|ALL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER] JOIN (subquery)|table USING columns_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr] [PREWHERE expr]
[WHERE expr] [WHERE expr]
[GROUP BY expr_list] [WITH TOTALS] [GROUP BY expr_list] [WITH TOTALS]

View File

@ -1048,17 +1048,6 @@ PREWHERE/WHEREにシャーディングキー条件があるSELECTクエリの未
デフォルト値:0 デフォルト値:0
## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested}
リセット [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) 入れ子の場合 `Distributed` テーブル
可能な値:
- 1 — Enabled.
- 0 — Disabled.
デフォルト値は0です。
## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}
例外のスローを有効または無効にします。 [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) クエリがマージを実行しませんでした。 例外のスローを有効または無効にします。 [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) クエリがマージを実行しませんでした。

View File

@ -82,6 +82,18 @@ sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh
Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты. Для запуска ClickHouse в Docker нужно следовать инструкции на [Docker Hub](https://hub.docker.com/r/yandex/clickhouse-server/). Внутри образов используются официальные `deb` пакеты.
### Из исполняемых файлов для нестандартных окружений {#from-binaries-non-linux}
Для других операционных систем и арихитектуры AArch64, сборки ClickHouse предоставляются в виде кросс-компилированного бинарника с последнего коммита ветки master (с задержкой в несколько часов).
- [macOS](https://builds.clickhouse.tech/master/macos/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/macos/clickhouse' && chmod a+x ./clickhouse`
- [AArch64](https://builds.clickhouse.tech/master/aarch64/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/aarch64/clickhouse' && chmod a+x ./clickhouse`
- [FreeBSD](https://builds.clickhouse.tech/master/freebsd/clickhouse) — `curl -O 'https://builds.clickhouse.tech/master/freebsd/clickhouse' && chmod a+x ./clickhouse`
После скачивания, можно воспользоваться `clickhouse client` для подключения к серверу, или `clickhouse local` для обработки локальных данных. Для запуска `clickhouse server` необходимо скачать конфигурационные файлы [сервера](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.xml) и [пользователей](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/users.xml) с GitHub.
Данные сборки не рекомендуются для использования в продакшене, так как они недостаточно тщательно протестированны. Также, в них присутствуют не все возможности ClickHouse.
### Из исходного кода {#from-sources} ### Из исходного кода {#from-sources}
Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build-osx.md). Для компиляции ClickHouse вручную, используйте инструкцию для [Linux](../development/build.md) или [Mac OS X](../development/build-osx.md).

View File

@ -276,7 +276,7 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&
### Пример {#primer} ### Пример {#primer}
``` bash ``` bash
$ curl -sS "<address>?param_id=2¶m_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}" $ curl -sS "http://localhost:8123/?param_id=2&param_phrase=test" -d "SELECT * FROM table WHERE int_column = {id:UInt8} and string_column = {phrase:String}"
``` ```
## Предопределенный HTTP интерфейс {#predefined_http_interface} ## Предопределенный HTTP интерфейс {#predefined_http_interface}

View File

@ -1025,7 +1025,7 @@ ClickHouse генерирует исключение
Значение по умолчанию: 0. Значение по умолчанию: 0.
## optimize_skip_unused_shards {#optimize-skip-unused-shards} ## optimize\_skip\_unused\_shards {#optimize-skip-unused-shards}
Включает или отключает пропуск неиспользуемых шардов для запросов [SELECT](../../sql-reference/statements/select/index.md) , в которых условие ключа шардирования задано в секции `WHERE/PREWHERE`. Предполагается, что данные распределены с помощью ключа шардирования, в противном случае настройка ничего не делает. Включает или отключает пропуск неиспользуемых шардов для запросов [SELECT](../../sql-reference/statements/select/index.md) , в которых условие ключа шардирования задано в секции `WHERE/PREWHERE`. Предполагается, что данные распределены с помощью ключа шардирования, в противном случае настройка ничего не делает.
@ -1036,15 +1036,39 @@ ClickHouse генерирует исключение
Значение по умолчанию: 0 Значение по умолчанию: 0
## force_optimize_skip_unused_shards {#force-optimize-skip-unused-shards} ## optimize\_skip\_unused\_shards\_nesting {#optimize-skip-unused-shards-nesting}
Контролирует настройку [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (поэтому все еще требует `optimize_skip_unused_shards`) в зависимости от вложенности распределенного запроса (когда у вас есть `Distributed` таблица которая смотрит на другую `Distributed` таблицу).
Возможные значения:
- 0 — Выключена, `optimize_skip_unused_shards` работает всегда.
- 1 — Включает `optimize_skip_unused_shards` только для 1-ого уровня вложенности.
- 2 — Включает `optimize_skip_unused_shards` для 1-ого и 2-ого уровня вложенности.
Значение по умолчанию: 0
## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards}
Разрешает или запрещает выполнение запроса, если настройка [optimize_skip_unused_shards](#optimize-skip-unused-shards) включена, а пропуск неиспользуемых шардов невозможен. Если данная настройка включена и пропуск невозможен, ClickHouse генерирует исключение. Разрешает или запрещает выполнение запроса, если настройка [optimize_skip_unused_shards](#optimize-skip-unused-shards) включена, а пропуск неиспользуемых шардов невозможен. Если данная настройка включена и пропуск невозможен, ClickHouse генерирует исключение.
Возможные значения: Возможные значения:
- 0 — Выключена. ClickHouse не генерирует исключение. - 0 — Выключена, `force_optimize_skip_unused_shards` работает всегда.
- 1 — Включена. Выполнение запроса запрещается, только если у таблицы есть ключ шардирования. - 1 — Включает `force_optimize_skip_unused_shards` только для 1-ого уровня вложенности.
- 2 — Включена. Выполнение запроса запрещается, даже если для таблицы не определен ключ шардирования. - 2 — Включает `force_optimize_skip_unused_shards` для 1-ого и 2-ого уровня вложенности.
Значение по умолчанию: 0
## force\_optimize\_skip\_unused\_shards\_nesting {#settings-force_optimize_skip_unused_shards_nesting}
Контролирует настройку [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (поэтому все еще требует `optimize_skip_unused_shards`) в зависимости от вложенности распределенного запроса (когда у вас есть `Distributed` таблица которая смотрит на другую `Distributed` таблицу).
Возможные значения:
- 0 - Disabled, `force_optimize_skip_unused_shards` works on all levels.
- 1 — Enables `force_optimize_skip_unused_shards` only for the first level.
- 2 — Enables `force_optimize_skip_unused_shards` up to the second level.
Значение по умолчанию: 0 Значение по умолчанию: 0

View File

@ -13,7 +13,7 @@ SELECT [DISTINCT] expr_list
[FROM [db.]table | (subquery) | table_function] [FINAL] [FROM [db.]table | (subquery) | table_function] [FINAL]
[SAMPLE sample_coeff] [SAMPLE sample_coeff]
[ARRAY JOIN ...] [ARRAY JOIN ...]
[GLOBAL] [ANY|ALL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER] JOIN (subquery)|table USING columns_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr] [PREWHERE expr]
[WHERE expr] [WHERE expr]
[GROUP BY expr_list] [WITH TOTALS] [GROUP BY expr_list] [WITH TOTALS]

View File

@ -20,7 +20,18 @@ Usually those also have some way to preview how Markdown will look like, which a
Itll take some effort to go through, but the result will be very close to production documentation. Itll take some effort to go through, but the result will be very close to production documentation.
For the first time youll need to install [wkhtmltopdf](https://wkhtmltopdf.org/) and set up virtualenv: For the first time youll need to:
#### 1. Install [wkhtmltopdf](https://wkhtmltopdf.org/)
Follow the instructions on it's official website: <https://wkhtmltopdf.org/downloads.html>
#### 2. Install CLI tools from npm
1. `apt-get install npm` for Debian/Ubuntu or `brew install npm` on Mac OS X.
2. `npm install -g purifycss amphtml-validator`.
#### 3. Set up virtualenv
``` bash ``` bash
$ cd ClickHouse/docs/tools $ cd ClickHouse/docs/tools
@ -30,7 +41,9 @@ $ source venv/bin/activate
$ pip3 install -r requirements.txt $ pip3 install -r requirements.txt
``` ```
Then running `build.py` without args (there are some, check `build.py --help`) will generate `ClickHouse/docs/build` folder with complete static html website. #### 4. Run build.py
When all prerequisites are installed, running `build.py` without args (there are some, check `build.py --help`) will generate `ClickHouse/docs/build` folder with complete static html website.
The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888. The easiest way to see the result is to use `--livereload=8888` argument of build.py. Alternatively, you can manually launch a HTTP server to serve the docs, for example by running `cd ClickHouse/docs/build && python3 -m http.server 8888`. Then go to http://localhost:8888 in browser. Feel free to use any other port instead of 8888.

View File

@ -26,6 +26,7 @@ MARKDOWN_EXTENSIONS = [
'mdx_clickhouse', 'mdx_clickhouse',
'admonition', 'admonition',
'attr_list', 'attr_list',
'def_list',
'codehilite', 'codehilite',
'nl2br', 'nl2br',
'sane_lists', 'sane_lists',

View File

@ -117,16 +117,19 @@ def translate_filter(key, value, _format, _):
admonition_value = [] admonition_value = []
remaining_para_value = [] remaining_para_value = []
in_admonition = True in_admonition = True
break_value = [pandocfilters.LineBreak(), pandocfilters.Str(' ' * 4)]
for item in value: for item in value:
if in_admonition: if in_admonition:
if item.get('t') == 'SoftBreak': if item.get('t') == 'SoftBreak':
in_admonition = False in_admonition = False
else: else:
admonition_value.append(item) admonition_value.append(item)
else:
if item.get('t') == 'SoftBreak':
remaining_para_value += break_value
else: else:
remaining_para_value.append(item) remaining_para_value.append(item)
break_value = [pandocfilters.LineBreak(), pandocfilters.Str(' ' * 4)]
if admonition_value[-1].get('t') == 'Quoted': if admonition_value[-1].get('t') == 'Quoted':
text = process_sentence(admonition_value[-1]['c'][-1]) text = process_sentence(admonition_value[-1]['c'][-1])
text[0]['c'] = '"' + text[0]['c'] text[0]['c'] = '"' + text[0]['c']
@ -136,7 +139,7 @@ def translate_filter(key, value, _format, _):
else: else:
text = admonition_value[-1].get('c') text = admonition_value[-1].get('c')
if text: if text:
text = translate(text[0].upper() + text[1:]) text = translate.translate(text[0].upper() + text[1:])
admonition_value.append(pandocfilters.Space()) admonition_value.append(pandocfilters.Space())
admonition_value.append(pandocfilters.Str(f'"{text}"')) admonition_value.append(pandocfilters.Str(f'"{text}"'))

View File

@ -16,7 +16,7 @@ source "${BASE_DIR}/venv/bin/activate"
${BASE_DIR}/split_meta.py "${INPUT_PATH}" ${BASE_DIR}/split_meta.py "${INPUT_PATH}"
pandoc "${INPUT_CONTENT}" --filter "${BASE_DIR}/filter.py" -o "${TEMP_FILE}" \ pandoc "${INPUT_CONTENT}" --filter "${BASE_DIR}/filter.py" -o "${TEMP_FILE}" \
-f "markdown-space_in_atx_header" -t "markdown_strict+pipe_tables+markdown_attribute+all_symbols_escapable+backtick_code_blocks+autolink_bare_uris-link_attributes+markdown_attribute+mmd_link_attributes-raw_attribute+header_attributes-grid_tables" \ -f "markdown-space_in_atx_header" -t "markdown_strict+pipe_tables+markdown_attribute+all_symbols_escapable+backtick_code_blocks+autolink_bare_uris-link_attributes+markdown_attribute+mmd_link_attributes-raw_attribute+header_attributes-grid_tables+definition_lists" \
--atx-headers --wrap=none --columns=99999 --tab-stop=4 --atx-headers --wrap=none --columns=99999 --tab-stop=4
perl -pi -e 's/{\\#\\#/{##/g' "${TEMP_FILE}" perl -pi -e 's/{\\#\\#/{##/g' "${TEMP_FILE}"
perl -pi -e 's/\\#\\#}/##}/g' "${TEMP_FILE}" perl -pi -e 's/\\#\\#}/##}/g' "${TEMP_FILE}"

View File

@ -67,6 +67,13 @@ def adjust_markdown_html(content):
summary.extract() summary.extract()
details.insert(0, summary) details.insert(0, summary)
for dd in soup.find_all('dd'):
dd_class = dd.attrs.get('class')
if dd_class:
dd.attrs['class'] = dd_class + ['pl-3']
else:
dd.attrs['class'] = 'pl-3'
for div in soup.find_all('div'): for div in soup.find_all('div'):
div_class = div.attrs.get('class') div_class = div.attrs.get('class')
is_admonition = div_class and 'admonition' in div.attrs.get('class') is_admonition = div_class and 'admonition' in div.attrs.get('class')

View File

@ -1048,17 +1048,6 @@ Olası değerler:
Varsayılan değer: 0 Varsayılan değer: 0
## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested}
Sıfırlamak [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) iç içe geçmiş için `Distributed` Tablo
Olası değerler:
- 1 — Enabled.
- 0 — Disabled.
Varsayılan değer: 0.
## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}
Bir özel durum atmayı etkinleştirir veya devre dışı bırakır. [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) sorgu birleştirme gerçekleştirmedi. Bir özel durum atmayı etkinleştirir veya devre dışı bırakır. [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) sorgu birleştirme gerçekleştirmedi.

View File

@ -1,9 +1,9 @@
--- ---
toc_priority: 37 toc_priority: 37
toc_title: "\u7248\u672C\u96C6\u5408\u5728\u65B0\u6811" toc_title: "版本折叠MergeTree"
--- ---
# 版本折叠合并树 {#versionedcollapsingmergetree} # 版本折叠MergeTree {#versionedcollapsingmergetree}
这个引擎: 这个引擎:

View File

@ -1048,17 +1048,6 @@ ClickHouse生成异常
默认值0 默认值0
## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested}
重置 [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) 对于嵌套 `Distributed`
可能的值:
- 1 — Enabled.
- 0 — Disabled.
默认值0。
## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}
启用或禁用抛出异常,如果 [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) 查询未执行合并。 启用或禁用抛出异常,如果 [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) 查询未执行合并。

View File

@ -19,7 +19,7 @@ SELECT [DISTINCT] expr_list
[FROM [db.]table | (subquery) | table_function] [FINAL] [FROM [db.]table | (subquery) | table_function] [FINAL]
[SAMPLE sample_coeff] [SAMPLE sample_coeff]
[ARRAY JOIN ...] [ARRAY JOIN ...]
[GLOBAL] [ANY|ALL] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER] JOIN (subquery)|table USING columns_list [GLOBAL] [ANY|ALL|ASOF] [INNER|LEFT|RIGHT|FULL|CROSS] [OUTER|SEMI|ANTI] JOIN (subquery)|table (ON <expr_list>)|(USING <column_list>)
[PREWHERE expr] [PREWHERE expr]
[WHERE expr] [WHERE expr]
[GROUP BY expr_list] [WITH TOTALS] [GROUP BY expr_list] [WITH TOTALS]

View File

@ -1,16 +1,18 @@
--- ---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 37 toc_priority: 37
toc_title: SYSTEM toc_title: SYSTEM
--- ---
# 系统查询 {#query-language-system} # SYSTEM Queries {#query-language-system}
- [RELOAD EMBEDDED DICTIONARIES](#query_language-system-reload-emdedded-dictionaries)
- [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries) - [RELOAD DICTIONARIES](#query_language-system-reload-dictionaries)
- [RELOAD DICTIONARY](#query_language-system-reload-dictionary) - [RELOAD DICTIONARY](#query_language-system-reload-dictionary)
- [DROP DNS CACHE](#query_language-system-drop-dns-cache) - [DROP DNS CACHE](#query_language-system-drop-dns-cache)
- [DROP MARK CACHE](#query_language-system-drop-mark-cache) - [DROP MARK CACHE](#query_language-system-drop-mark-cache)
- [DROP UNCOMPRESSED CACHE](#query_language-system-drop-uncompressed-cache)
- [DROP COMPILED EXPRESSION CACHE](#query_language-system-drop-compiled-expression-cache)
- [DROP REPLICA](#query_language-system-drop-replica)
- [FLUSH LOGS](#query_language-system-flush_logs) - [FLUSH LOGS](#query_language-system-flush_logs)
- [RELOAD CONFIG](#query_language-system-reload-config) - [RELOAD CONFIG](#query_language-system-reload-config)
- [SHUTDOWN](#query_language-system-shutdown) - [SHUTDOWN](#query_language-system-shutdown)
@ -20,18 +22,37 @@ toc_title: SYSTEM
- [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) - [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)
- [STOP MERGES](#query_language-system-stop-merges) - [STOP MERGES](#query_language-system-stop-merges)
- [START MERGES](#query_language-system-start-merges) - [START MERGES](#query_language-system-start-merges)
- [STOP TTL MERGES](#query_language-stop-ttl-merges)
- [START TTL MERGES](#query_language-start-ttl-merges)
- [STOP MOVES](#query_language-stop-moves)
- [START MOVES](#query_language-start-moves)
- [STOP FETCHES](#query_language-system-stop-fetches)
- [START FETCHES](#query_language-system-start-fetches)
- [STOP REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [START REPLICATED SENDS](#query_language-system-start-replicated-sends)
- [STOP REPLICATION QUEUES](#query_language-system-stop-replication-queues)
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES\] {#query_language-system-reload-emdedded-dictionaries}
重新加载所有[内置字典](../../sql-reference/dictionaries/internal-dicts.md)。默认情况下内置字典是禁用的。
总是返回 OK.’,不管这些内置字典的更新结果如何。
## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries} ## RELOAD DICTIONARIES {#query_language-system-reload-dictionaries}
重新加载之前已成功加载的所有字典。 重载已经被成功加载过的所有字典。
默认情况下,字典是懒惰加载的(请参阅 [dictionaries\_lazy\_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)所以不是在启动时自动加载而是通过dictGet函数在第一次访问时初始化或者从ENGINE=Dictionary的表中选择。 该 `SYSTEM RELOAD DICTIONARIES` 查询重新加载这样的字典(加载)。 默认情况下,字典是延时加载的( [dictionaries\_lazy\_load](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load)不是在服务启动时自动加载而是在第一次使用dictGet函数或通过 `SELECT from tables with ENGINE = Dictionary` 进行访问时被初始化。这个命令 `SYSTEM RELOAD DICTIONARIES` 就是针对这类表进行重新加载的。
总是返回 `Ok.` 无论字典更新的结果如何。
## 重新加载字典Dictionary\_name {#query_language-system-reload-dictionary}
完全重新加载字典 `dictionary_name`与字典的状态无关LOADED/NOT\_LOADED/FAILED ## RELOAD DICTIONARY Dictionary\_name {#query_language-system-reload-dictionary}
总是返回 `Ok.` 无论更新字典的结果如何。
字典的状态可以通过查询 `system.dictionaries` 桌子 完全重新加载指定字典 `dictionary_name`,不管该字典的状态如何(LOADED / NOT\_LOADED / FAILED)。不管字典的更新结果如何,总是返回 `OK.`
字典的状态可以通过查询 `system.dictionaries`表来检查。
``` sql ``` sql
SELECT name, status FROM system.dictionaries; SELECT name, status FROM system.dictionaries;
@ -39,37 +60,67 @@ SELECT name, status FROM system.dictionaries;
## DROP DNS CACHE {#query_language-system-drop-dns-cache} ## DROP DNS CACHE {#query_language-system-drop-dns-cache}
重置ClickHouse的内部DNS缓存。 有时对于旧的ClickHouse版本在更改基础架构更改另一个ClickHouse服务器或字典使用的服务器的IP地址时需要使用此命令。 重置CH的dns缓存。有时候对于旧的ClickHouse版本当某些底层环境发生变化时修改其它Clickhouse服务器的ip或字典所在服务器的ip需要使用该命令。
更多自动化的缓存管理相关信息参见disable\_internal\_dns\_cache, dns\_cache\_update\_period这些参数。
有关更方便自动缓存管理请参阅disable\_internal\_dns\_cache、dns\_cache\_update\_period参数。
## DROP MARK CACHE {#query_language-system-drop-mark-cache} ## DROP MARK CACHE {#query_language-system-drop-mark-cache}
重置标记缓存。 用于开发ClickHouse和性能测试。 重置mark缓存。在进行ClickHouse开发或性能测试时使用。
## DROP REPLICA {#query_language-system-drop-replica}
使用下面的语句可以删除已经无效的副本。
```sql
SYSTEM DROP REPLICA 'replica_name' FROM TABLE database.table;
SYSTEM DROP REPLICA 'replica_name' FROM DATABASE database;
SYSTEM DROP REPLICA 'replica_name';
SYSTEM DROP REPLICA 'replica_name' FROM ZKPATH '/path/to/table/in/zk';
```
该操作将副本的路径从Zookeeper中删除。当副本失效并且由于该副本已经不存在导致它的元数据不能通过 `DROP TABLE`从zookeeper中删除这种情形下可以使用该命令。它只会删除失效或过期的副本不会删除本地的副本。请使用 `DROP TABLE` 来删除本地副本。 `DROP REPLICA` 不会删除任何表,并且不会删除磁盘上的任何数据或元数据信息。
第1条语句删除 `database.table`表的 `replica_name`副本的元数据
第2条语句删除 `database` 数据库的 所有`replica_name`副本的元数据
第3条语句删除本地服务器所有 `replica_name`副本的元数据
第4条语句用于在表的其它所有副本都删除时删除已失效副本的元数据。使用时需要明确指定表的路径。该路径必须和创建表时 `ReplicatedMergeTree`引擎的第一个参数一致。
## DROP UNCOMPRESSED CACHE {#query_language-system-drop-uncompressed-cache}
重置未压缩数据的缓存。用于ClickHouse开发和性能测试。
管理未压缩数据缓存的参数,使用以下的服务器级别设置 [uncompressed\_cache\_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size)以及 `query/user/profile`级别设置 [use\_uncompressed\_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache)
## DROP COMPILED EXPRESSION CACHE {#query_language-system-drop-compiled-expression-cache}
重置已编译的表达式缓存。用于ClickHouse开发和性能测试。
`query/user/profile` 启用配置项 [compile](../../operations/settings/settings.md#compile)时,编译的表达式缓存开启。
## FLUSH LOGS {#query_language-system-flush_logs} ## FLUSH LOGS {#query_language-system-flush_logs}
Flushes buffers of log messages to system tables (e.g. system.query\_log). Allows you to not wait 7.5 seconds when debugging. 将日志信息缓冲数据刷入系统表例如system.query\_log。调试时允许等待不超过7.5秒。当信息队列为空时,会创建系统表。
## RELOAD CONFIG {#query_language-system-reload-config} ## RELOAD CONFIG {#query_language-system-reload-config}
重新加载ClickHouse配置。 当配置存储在ZooKeeeper中时使用。 重新加载ClickHouse的配置。用于当配置信息存放在ZooKeeper时
## SHUTDOWN {#query_language-system-shutdown} ## SHUTDOWN {#query_language-system-shutdown}
通常关闭ClickHouse`service clickhouse-server stop` / `kill {$pid_clickhouse-server}`) 关闭ClickHouse服务类似于 `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`
## KILL {#query_language-system-kill} ## KILL {#query_language-system-kill}
中止ClickHouse进程`kill -9 {$ pid_clickhouse-server}`) 关闭ClickHouse进程 `kill -9 {$ pid_clickhouse-server}`
## 管理分布式表 {#query-language-system-distributed} ## Managing Distributed Tables {#query-language-system-distributed}
ClickHouse可以管理 [distribute](../../engines/table-engines/special/distributed.md)表。当用户向这类表插入数据时ClickHouse首先为需要发送到集群节点的数据创建一个队列然后异步的发送它们。你可以维护队列的处理过程通过[STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed), 以及 [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends)。你也可以设置 `insert_distributed_sync`参数来以同步的方式插入分布式数据。
ClickHouse可以管理 [分布](../../engines/table-engines/special/distributed.md) 桌子 当用户将数据插入到这些表中时ClickHouse首先创建应发送到群集节点的数据队列然后异步发送它。 您可以使用 [STOP DISTRIBUTED SENDS](#query_language-system-stop-distributed-sends), [FLUSH DISTRIBUTED](#query_language-system-flush-distributed),和 [START DISTRIBUTED SENDS](#query_language-system-start-distributed-sends) 查询。 您也可以同步插入分布式数据与 `insert_distributed_sync` 设置。
### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends} ### STOP DISTRIBUTED SENDS {#query_language-system-stop-distributed-sends}
将数据插入分布式表时禁用后台数据分发。 当向分布式表插入数据时,禁用后台的分布式数据分发。
``` sql ``` sql
SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name> SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
@ -77,7 +128,7 @@ SYSTEM STOP DISTRIBUTED SENDS [db.]<distributed_table_name>
### FLUSH DISTRIBUTED {#query_language-system-flush-distributed} ### FLUSH DISTRIBUTED {#query_language-system-flush-distributed}
强制ClickHouse将数据同步发送到群集节点。 如果任何节点不可用ClickHouse将引发异常并停止查询执行。 您可以重试查询,直到查询成功,这将在所有节点恢复联机时发生 强制让ClickHouse同步向集群节点同步发送数据。如果有节点失效ClickHouse抛出异常并停止插入操作。当所有节点都恢复上线时你可以重试之前的操作直到成功执行
``` sql ``` sql
SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name> SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
@ -85,29 +136,152 @@ SYSTEM FLUSH DISTRIBUTED [db.]<distributed_table_name>
### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends} ### START DISTRIBUTED SENDS {#query_language-system-start-distributed-sends}
将数据插入分布式表时启用后台数据分发。 当向分布式表插入数据时,允许后台的分布式数据分发。
``` sql ``` sql
SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name> SYSTEM START DISTRIBUTED SENDS [db.]<distributed_table_name>
``` ```
## Managing MergeTree Tables {#query-language-system-mergetree}
ClickHouse可以管理 [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)表的后台处理进程。
### STOP MERGES {#query_language-system-stop-merges} ### STOP MERGES {#query_language-system-stop-merges}
提供停止MergeTree系列中表的后台合并的可能性: 为MergeTree系列引擎表停止后台合并操作。
``` sql ``` sql
SYSTEM STOP MERGES [[db.]merge_tree_family_table_name] SYSTEM STOP MERGES [[db.]merge_tree_family_table_name]
``` ```
!!! note "注"
`DETACH / ATTACH` 即使在之前所有MergeTree表的合并已停止的情况下table也会为表启动后台合并。 !!! note "Note"
`DETACH / ATTACH` 表操作会在后台进行表的merge操作甚至当所有MergeTree表的合并操作已经停止的情况下。
### START MERGES {#query_language-system-start-merges} ### START MERGES {#query_language-system-start-merges}
为MergeTree系列中的表提供启动后台合并的可能性: 为MergeTree系列引擎表启动后台合并操作。
``` sql ``` sql
SYSTEM START MERGES [[db.]merge_tree_family_table_name] SYSTEM START MERGES [[db.]merge_tree_family_table_name]
``` ```
[原始文章](https://clickhouse.tech/docs/en/query_language/system/) <!--hide--> ### STOP TTL MERGES {#query_language-stop-ttl-merges}
根据 [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)为MergeTree系列引擎表停止后台删除旧数据。
不管表存在与否,都返回 `OK.`。当数据库不存在时返回错误。
``` sql
SYSTEM STOP TTL MERGES [[db.]merge_tree_family_table_name]
```
### START TTL MERGES {#query_language-start-ttl-merges}
根据 [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)为MergeTree系列引擎表启动后台删除旧数据。不管表存在与否都返回 `OK.`。当数据库不存在时返回错误。
``` sql
SYSTEM START TTL MERGES [[db.]merge_tree_family_table_name]
```
### STOP MOVES {#query_language-stop-moves}
根据 [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)为MergeTree系列引擎表停止后台移动数据。不管表存在与否都返回 `OK.`。当数据库不存在时返回错误。
``` sql
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
```
### START MOVES {#query_language-start-moves}
根据 [TTL expression](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)为MergeTree系列引擎表启动后台移动数据。不管表存在与否都返回 `OK.`。当数据库不存在时返回错误。
``` sql
SYSTEM STOP MOVES [[db.]merge_tree_family_table_name]
```
## Managing ReplicatedMergeTree Tables {#query-language-system-replicated}
管理 [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md)表的后台复制相关进程。
### STOP FETCHES {#query_language-system-stop-fetches}
停止后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
SYSTEM STOP FETCHES [[db.]replicated_merge_tree_family_table_name]
```
### START FETCHES {#query_language-system-start-fetches}
启动后台获取 `ReplicatedMergeTree`系列引擎表中插入的数据块。
不管表引擎类型如何或表/数据库是否存,都返回 `OK.`
``` sql
SYSTEM START FETCHES [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATED SENDS {#query_language-system-start-replicated-sends}
停止通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM STOP REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATED SENDS {#query_language-system-start-replicated-sends}
启动通过后台分发 `ReplicatedMergeTree`系列引擎表中新插入的数据块到集群的其它副本节点。
``` sql
SYSTEM START REPLICATED SENDS [[db.]replicated_merge_tree_family_table_name]
```
### STOP REPLICATION QUEUES {#query_language-system-stop-replication-queues}
停止从Zookeeper中获取 `ReplicatedMergeTree`系列表的复制队列的后台任务。可能的后台任务类型包含merges, fetches, mutation带有 `ON CLUSTER`的ddl语句
``` sql
SYSTEM STOP REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
```
### START REPLICATION QUEUES {#query_language-system-start-replication-queues}
启动从Zookeeper中获取 `ReplicatedMergeTree`系列表的复制队列的后台任务。可能的后台任务类型包含merges, fetches, mutation带有 `ON CLUSTER`的ddl语句
``` sql
SYSTEM START REPLICATION QUEUES [[db.]replicated_merge_tree_family_table_name]
```
### SYNC REPLICA {#query_language-system-sync-replica}
直到 `ReplicatedMergeTree`表将要和集群的其它副本进行同步之前会一直运行。如果当前对表的获取操作禁用的话,在达到 `receive_timeout`之前会一直运行。
``` sql
SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
```
### RESTART REPLICA {#query_language-system-restart-replica}
重置 `ReplicatedMergeTree`表的Zookeeper会话状态。该操作会以Zookeeper为参照对比当前状态有需要的情况下将任务添加到ZooKeeper队列。
基于ZooKeeper的日期初始化复制队列类似于 `ATTACH TABLE`语句。短时间内不能对表进行任何操作。
``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
```
### RESTART REPLICAS {#query_language-system-restart-replicas}
重置所有 `ReplicatedMergeTree`表的ZooKeeper会话状态。该操作会以Zookeeper为参照对比当前状态有需要的情况下将任务添加到ZooKeeper队列。
``` sql
SYSTEM RESTART QUEUES [db.]replicated_merge_tree_family_table_name
```
[原始文档](https://clickhouse.tech/docs/en/query_language/system/) <!--hide-->

View File

@ -60,11 +60,15 @@ public:
bool cumulative_, bool secure_, const String & default_database_, bool cumulative_, bool secure_, const String & default_database_,
const String & user_, const String & password_, const String & stage, const String & user_, const String & password_, const String & stage,
bool randomize_, size_t max_iterations_, double max_time_, bool randomize_, size_t max_iterations_, double max_time_,
const String & json_path_, size_t confidence_, const String & query_id_, const Settings & settings_) const String & json_path_, size_t confidence_,
const String & query_id_, bool continue_on_errors_,
bool print_stacktrace_, const Settings & settings_)
: :
concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_), concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_),
cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_),
json_path(json_path_), confidence(confidence_), query_id(query_id_), settings(settings_), json_path(json_path_), confidence(confidence_), query_id(query_id_),
continue_on_errors(continue_on_errors_),
print_stacktrace(print_stacktrace_), settings(settings_),
shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())),
pool(concurrency) pool(concurrency)
{ {
@ -150,6 +154,8 @@ private:
String json_path; String json_path;
size_t confidence; size_t confidence;
std::string query_id; std::string query_id;
bool continue_on_errors;
bool print_stacktrace;
Settings settings; Settings settings;
SharedContextHolder shared_context; SharedContextHolder shared_context;
Context global_context; Context global_context;
@ -163,6 +169,7 @@ private:
struct Stats struct Stats
{ {
std::atomic<size_t> queries{0}; std::atomic<size_t> queries{0};
size_t errors = 0;
size_t read_rows = 0; size_t read_rows = 0;
size_t read_bytes = 0; size_t read_bytes = 0;
size_t result_rows = 0; size_t result_rows = 0;
@ -259,7 +266,7 @@ private:
if (interrupt_listener.check()) if (interrupt_listener.check())
{ {
std::cout << "Stopping launch of queries. SIGINT received.\n"; std::cout << "Stopping launch of queries. SIGINT received." << std::endl;
return false; return false;
} }
@ -333,14 +340,14 @@ private:
pcg64 generator(randomSeed()); pcg64 generator(randomSeed());
std::uniform_int_distribution<size_t> distribution(0, connection_entries.size() - 1); std::uniform_int_distribution<size_t> distribution(0, connection_entries.size() - 1);
try
{
/// In these threads we do not accept INT signal. /// In these threads we do not accept INT signal.
sigset_t sig_set; sigset_t sig_set;
if (sigemptyset(&sig_set) if (sigemptyset(&sig_set)
|| sigaddset(&sig_set, SIGINT) || sigaddset(&sig_set, SIGINT)
|| pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
{
throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL); throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL);
}
while (true) while (true)
{ {
@ -350,19 +357,40 @@ private:
{ {
extracted = queue.tryPop(query, 100); extracted = queue.tryPop(query, 100);
if (shutdown || (max_iterations && queries_executed == max_iterations)) if (shutdown
|| (max_iterations && queries_executed == max_iterations))
{
return; return;
} }
execute(connection_entries, query, distribution(generator));
++queries_executed;
} }
const auto connection_index = distribution(generator);
try
{
execute(connection_entries, query, connection_index);
} }
catch (...) catch (...)
{
std::cerr << "An error occurred while processing the query '"
<< query << "'.\n";
if (!continue_on_errors)
{ {
shutdown = true; shutdown = true;
std::cerr << "An error occurred while processing query:\n" << query << "\n";
throw; throw;
} }
else
{
std::cerr << getCurrentExceptionMessage(print_stacktrace,
true /*check embedded stack trace*/) << std::endl;
comparison_info_per_interval[connection_index]->errors++;
comparison_info_total[connection_index]->errors++;
}
}
// Count failed queries toward executed, so that we'd reach
// max_iterations even if every run fails.
++queries_executed;
}
} }
void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index) void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index)
@ -410,7 +438,12 @@ private:
std::cerr std::cerr
<< connections[i]->getDescription() << ", " << connections[i]->getDescription() << ", "
<< "queries " << info->queries << ", " << "queries " << info->queries << ", ";
if (info->errors)
{
std::cerr << "errors " << info->errors << ", ";
}
std::cerr
<< "QPS: " << (info->queries / seconds) << ", " << "QPS: " << (info->queries / seconds) << ", "
<< "RPS: " << (info->read_rows / seconds) << ", " << "RPS: " << (info->read_rows / seconds) << ", "
<< "MiB/s: " << (info->read_bytes / seconds / 1048576) << ", " << "MiB/s: " << (info->read_bytes / seconds / 1048576) << ", "
@ -477,11 +510,14 @@ private:
print_key_value("MiBPS", info->read_bytes / info->work_time); print_key_value("MiBPS", info->read_bytes / info->work_time);
print_key_value("RPS_result", info->result_rows / info->work_time); print_key_value("RPS_result", info->result_rows / info->work_time);
print_key_value("MiBPS_result", info->result_bytes / info->work_time); print_key_value("MiBPS_result", info->result_bytes / info->work_time);
print_key_value("num_queries", info->queries.load(), false); print_key_value("num_queries", info->queries.load());
print_key_value("num_errors", info->errors, false);
json_out << "},\n"; json_out << "},\n";
json_out << double_quote << "query_time_percentiles" << ": {\n"; json_out << double_quote << "query_time_percentiles" << ": {\n";
if (info->queries != 0)
{
for (int percent = 0; percent <= 90; percent += 10) for (int percent = 0; percent <= 90; percent += 10)
print_percentile(*info, percent); print_percentile(*info, percent);
@ -489,6 +525,7 @@ private:
print_percentile(*info, 99); print_percentile(*info, 99);
print_percentile(*info, 99.9); print_percentile(*info, 99.9);
print_percentile(*info, 99.99, false); print_percentile(*info, 99.99, false);
}
json_out << "}\n"; json_out << "}\n";
json_out << (i == infos.size() - 1 ? "}\n" : "},\n"); json_out << (i == infos.size() - 1 ? "}\n" : "},\n");
@ -542,6 +579,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
("stacktrace", "print stack traces of exceptions") ("stacktrace", "print stack traces of exceptions")
("confidence", value<size_t>()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("confidence", value<size_t>()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)")
("query_id", value<std::string>()->default_value(""), "") ("query_id", value<std::string>()->default_value(""), "")
("continue_on_errors", "continue testing even if a query fails")
; ;
Settings settings; Settings settings;
@ -583,6 +621,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
options["json"].as<std::string>(), options["json"].as<std::string>(),
options["confidence"].as<size_t>(), options["confidence"].as<size_t>(),
options["query_id"].as<std::string>(), options["query_id"].as<std::string>(),
options.count("continue_on_errors") > 0,
print_stacktrace,
settings); settings);
return benchmark.run(); return benchmark.run();
} }

View File

@ -132,7 +132,12 @@ private:
std::unique_ptr<Connection> connection; /// Connection to DB. std::unique_ptr<Connection> connection; /// Connection to DB.
String query_id; /// Current query_id. String query_id; /// Current query_id.
String query; /// Current query. String full_query; /// Current query as it was given to the client.
// Current query as it will be sent to the server. It may differ from the
// full query for INSERT queries, for which the data that follows the query
// is stripped and sent separately.
String query_to_send;
String format; /// Query results output format. String format; /// Query results output format.
bool is_default_format = true; /// false, if format is set in the config or command line. bool is_default_format = true; /// false, if format is set in the config or command line.
@ -177,10 +182,10 @@ private:
ASTPtr parsed_query; ASTPtr parsed_query;
/// The last exception that was received from the server. Is used for the return code in batch mode. /// The last exception that was received from the server. Is used for the return code in batch mode.
std::unique_ptr<Exception> last_exception; std::unique_ptr<Exception> last_exception_received_from_server;
/// If the last query resulted in exception. /// If the last query resulted in exception.
bool got_exception = false; bool received_exception_from_server = false;
int expected_server_error = 0; int expected_server_error = 0;
int expected_client_error = 0; int expected_client_error = 0;
int actual_server_error = 0; int actual_server_error = 0;
@ -616,7 +621,7 @@ private:
try try
{ {
if (!process(input)) if (!processQueryText(input))
break; break;
} }
catch (const Exception & e) catch (const Exception & e)
@ -657,8 +662,8 @@ private:
nonInteractive(); nonInteractive();
/// If exception code isn't zero, we should return non-zero return code anyway. /// If exception code isn't zero, we should return non-zero return code anyway.
if (last_exception) if (last_exception_received_from_server)
return last_exception->code() != 0 ? last_exception->code() : -1; return last_exception_received_from_server->code() != 0 ? last_exception_received_from_server->code() : -1;
return 0; return 0;
} }
@ -753,22 +758,31 @@ private:
readStringUntilEOF(text, in); readStringUntilEOF(text, in);
} }
process(text); processQueryText(text);
} }
bool processQueryText(const String & text)
bool process(const String & text)
{ {
if (exit_strings.end() != exit_strings.find(trim(text, [](char c){ return isWhitespaceASCII(c) || c == ';'; }))) if (exit_strings.end() != exit_strings.find(trim(text, [](char c){ return isWhitespaceASCII(c) || c == ';'; })))
return false; return false;
const bool test_mode = config().has("testmode"); if (!config().has("multiquery"))
if (config().has("multiquery"))
{ {
processTextAsSingleQuery(text);
return true;
}
return processMultiQuery(text);
}
bool processMultiQuery(const String & text)
{
const bool test_mode = config().has("testmode");
{ /// disable logs if expects errors { /// disable logs if expects errors
TestHint test_hint(test_mode, text); TestHint test_hint(test_mode, text);
if (test_hint.clientError() || test_hint.serverError()) if (test_hint.clientError() || test_hint.serverError())
process("SET send_logs_level = 'none'"); processTextAsSingleQuery("SET send_logs_level = 'none'");
} }
/// Several queries separated by ';'. /// Several queries separated by ';'.
@ -780,9 +794,9 @@ private:
while (begin < end) while (begin < end)
{ {
const char * pos = begin; const char * pos = begin;
ASTPtr ast = parseQuery(pos, end, true); ASTPtr orig_ast = parseQuery(pos, end, true);
if (!ast) if (!orig_ast)
{ {
if (ignore_error) if (ignore_error)
{ {
@ -797,7 +811,7 @@ private:
return true; return true;
} }
auto * insert = ast->as<ASTInsertQuery>(); auto * insert = orig_ast->as<ASTInsertQuery>();
if (insert && insert->data) if (insert && insert->data)
{ {
@ -817,26 +831,33 @@ private:
try try
{ {
auto ast_to_process = ast; auto ast_to_process = orig_ast;
if (insert && insert->data) if (insert && insert->data)
{
ast_to_process = nullptr; ast_to_process = nullptr;
processTextAsSingleQuery(str);
if (!processSingleQuery(str, ast_to_process) && !ignore_error) }
return false; else
{
parsed_query = ast_to_process;
full_query = str;
query_to_send = str;
processParsedSingleQuery();
}
} }
catch (...) catch (...)
{ {
last_exception = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode()); last_exception_received_from_server = std::make_unique<Exception>(getCurrentExceptionMessage(true), getCurrentExceptionCode());
actual_client_error = last_exception->code(); actual_client_error = last_exception_received_from_server->code();
if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error)) if (!ignore_error && (!actual_client_error || actual_client_error != expected_client_error))
std::cerr << "Error on processing query: " << str << std::endl << last_exception->message(); std::cerr << "Error on processing query: " << str << std::endl << last_exception_received_from_server->message();
got_exception = true; received_exception_from_server = true;
} }
if (!test_hint.checkActual(actual_server_error, actual_client_error, got_exception, last_exception)) if (!test_hint.checkActual(actual_server_error, actual_client_error, received_exception_from_server, last_exception_received_from_server))
connection->forceConnected(connection_parameters.timeouts); connection->forceConnected(connection_parameters.timeouts);
if (got_exception && !ignore_error) if (received_exception_from_server && !ignore_error)
{ {
if (is_interactive) if (is_interactive)
break; break;
@ -847,41 +868,53 @@ private:
return true; return true;
} }
void processTextAsSingleQuery(const String & text_)
{
full_query = text_;
/// Some parts of a query (result output and formatting) are executed
/// client-side. Thus we need to parse the query.
const char * begin = full_query.data();
parsed_query = parseQuery(begin, begin + full_query.size(), false);
if (!parsed_query)
return;
// An INSERT query may have the data that follow query text. Remove the
/// Send part of query without data, because data will be sent separately.
auto * insert = parsed_query->as<ASTInsertQuery>();
if (insert && insert->data)
{
query_to_send = full_query.substr(0, insert->data - full_query.data());
}
else else
{ {
return processSingleQuery(text); query_to_send = full_query;
}
} }
processParsedSingleQuery();
}
bool processSingleQuery(const String & line, ASTPtr parsed_query_ = nullptr) // Parameters are in global variables:
// 'parsed_query' -- the query AST,
// 'query_to_send' -- the query text that is sent to server,
// 'full_query' -- for INSERT queries, contains the query and the data that
// follow it. Its memory is referenced by ASTInsertQuery::begin, end.
void processParsedSingleQuery()
{ {
resetOutput(); resetOutput();
got_exception = false; received_exception_from_server = false;
if (echo_queries) if (echo_queries)
{ {
writeString(line, std_out); writeString(full_query, std_out);
writeChar('\n', std_out); writeChar('\n', std_out);
std_out.next(); std_out.next();
} }
watch.restart(); watch.restart();
query = line;
/// Some parts of a query (result output and formatting) are executed client-side.
/// Thus we need to parse the query.
parsed_query = parsed_query_;
if (!parsed_query)
{
const char * begin = query.data();
parsed_query = parseQuery(begin, begin + query.size(), false);
}
if (!parsed_query)
return true;
processed_rows = 0; processed_rows = 0;
progress.reset(); progress.reset();
show_progress_bar = false; show_progress_bar = false;
@ -924,7 +957,7 @@ private:
} }
/// Do not change context (current DB, settings) in case of an exception. /// Do not change context (current DB, settings) in case of an exception.
if (!got_exception) if (!received_exception_from_server)
{ {
if (const auto * set_query = parsed_query->as<ASTSetQuery>()) if (const auto * set_query = parsed_query->as<ASTSetQuery>())
{ {
@ -962,8 +995,6 @@ private:
{ {
std::cerr << watch.elapsedSeconds() << "\n"; std::cerr << watch.elapsedSeconds() << "\n";
} }
return true;
} }
@ -995,17 +1026,19 @@ private:
visitor.visit(parsed_query); visitor.visit(parsed_query);
/// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data. /// Get new query after substitutions. Note that it cannot be done for INSERT query with embedded data.
query = serializeAST(*parsed_query); query_to_send = serializeAST(*parsed_query);
} }
static constexpr size_t max_retries = 10; int retries_left = 10;
for (size_t retry = 0; retry < max_retries; ++retry) for (;;)
{ {
assert(retries_left > 0);
try try
{ {
connection->sendQuery( connection->sendQuery(
connection_parameters.timeouts, connection_parameters.timeouts,
query, query_to_send,
query_id, query_id,
QueryProcessingStage::Complete, QueryProcessingStage::Complete,
&context.getSettingsRef(), &context.getSettingsRef(),
@ -1019,31 +1052,34 @@ private:
} }
catch (const Exception & e) catch (const Exception & e)
{ {
/// Retry when the server said "Client should retry" and no rows has been received yet. /// Retry when the server said "Client should retry" and no rows
if (processed_rows == 0 && e.code() == ErrorCodes::DEADLOCK_AVOIDED && retry + 1 < max_retries) /// has been received yet.
continue; if (processed_rows == 0
&& e.code() == ErrorCodes::DEADLOCK_AVOIDED
&& --retries_left)
{
std::cerr << "Got a transient error from the server, will"
<< " retry (" << retries_left << " retries left)";
}
else
{
throw; throw;
} }
} }
} }
}
/// Process the query that requires transferring data blocks to the server. /// Process the query that requires transferring data blocks to the server.
void processInsertQuery() void processInsertQuery()
{ {
/// Send part of query without data, because data will be sent separately. const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
const auto & parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
String query_without_data = parsed_insert_query.data
? query.substr(0, parsed_insert_query.data - query.data())
: query;
if (!parsed_insert_query.data && (is_interactive || (!stdin_is_a_tty && std_in.eof()))) if (!parsed_insert_query.data && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
connection->sendQuery( connection->sendQuery(
connection_parameters.timeouts, connection_parameters.timeouts,
query_without_data, query_to_send,
query_id, query_id,
QueryProcessingStage::Complete, QueryProcessingStage::Complete,
&context.getSettingsRef(), &context.getSettingsRef(),
@ -1310,8 +1346,8 @@ private:
return true; return true;
case Protocol::Server::Exception: case Protocol::Server::Exception:
onException(*packet.exception); onReceiveExceptionFromServer(*packet.exception);
last_exception = std::move(packet.exception); last_exception_received_from_server = std::move(packet.exception);
return false; return false;
case Protocol::Server::Log: case Protocol::Server::Log:
@ -1342,8 +1378,8 @@ private:
return true; return true;
case Protocol::Server::Exception: case Protocol::Server::Exception:
onException(*packet.exception); onReceiveExceptionFromServer(*packet.exception);
last_exception = std::move(packet.exception); last_exception_received_from_server = std::move(packet.exception);
return false; return false;
case Protocol::Server::Log: case Protocol::Server::Log:
@ -1376,8 +1412,8 @@ private:
return true; return true;
case Protocol::Server::Exception: case Protocol::Server::Exception:
onException(*packet.exception); onReceiveExceptionFromServer(*packet.exception);
last_exception = std::move(packet.exception); last_exception_received_from_server = std::move(packet.exception);
return false; return false;
case Protocol::Server::Log: case Protocol::Server::Log:
@ -1477,7 +1513,8 @@ private:
} }
else else
{ {
out_logs_buf = std::make_unique<WriteBufferFromFile>(server_logs_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT); out_logs_buf = std::make_unique<WriteBufferFromFile>(
server_logs_file, DBMS_DEFAULT_BUFFER_SIZE, O_WRONLY | O_APPEND | O_CREAT);
wb = out_logs_buf.get(); wb = out_logs_buf.get();
} }
} }
@ -1659,10 +1696,10 @@ private:
} }
void onException(const Exception & e) void onReceiveExceptionFromServer(const Exception & e)
{ {
resetOutput(); resetOutput();
got_exception = true; received_exception_from_server = true;
actual_server_error = e.code(); actual_server_error = e.code();
if (expected_server_error) if (expected_server_error)

View File

@ -39,12 +39,16 @@
#include <common/argsToConfig.h> #include <common/argsToConfig.h>
#include <Common/TerminalSize.h> #include <Common/TerminalSize.h>
#include <filesystem>
namespace DB namespace DB
{ {
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
extern const int SYNTAX_ERROR; extern const int SYNTAX_ERROR;
extern const int CANNOT_LOAD_CONFIG; extern const int CANNOT_LOAD_CONFIG;
} }
@ -98,22 +102,55 @@ void LocalServer::applyCmdSettings()
/// If path is specified and not empty, will try to setup server environment and load existing metadata /// If path is specified and not empty, will try to setup server environment and load existing metadata
void LocalServer::tryInitPath() void LocalServer::tryInitPath()
{ {
std::string path = config().getString("path", ""); std::string path;
if (config().has("path"))
{
// User-supplied path.
path = config().getString("path");
Poco::trimInPlace(path); Poco::trimInPlace(path);
if (!path.empty()) if (path.empty())
{ {
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Cannot work with emtpy storage path that is explicitly specified"
" by the --path option. Please check the program options and"
" correct the --path.");
}
}
else
{
// Default unique path in the system temporary directory.
const auto tmp = std::filesystem::temp_directory_path();
const auto default_path = tmp
/ fmt::format("clickhouse-local-{}", getpid());
if (exists(default_path))
{
// This is a directory that is left by a previous run of
// clickhouse-local that had the same pid and did not complete
// correctly. Remove it, with an additional sanity check.
if (default_path.parent_path() != tmp)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"The temporary directory of clickhouse-local '{}' is not"
" inside the system temporary directory '{}'. Will not delete"
" it", default_path.string(), tmp.string());
}
remove_all(default_path);
}
create_directory(default_path);
temporary_directory_to_delete = default_path;
path = default_path.string();
}
if (path.back() != '/') if (path.back() != '/')
path += '/'; path += '/';
context->setPath(path); context->setPath(path);
return;
}
/// In case of empty path set paths to helpful directories
std::string cd = Poco::Path::current();
context->setTemporaryStorage(cd + "tmp");
context->setFlagsPath(cd + "flags");
context->setUserFilesPath(""); // user's files are everywhere context->setUserFilesPath(""); // user's files are everywhere
} }
@ -228,10 +265,21 @@ try
context->shutdown(); context->shutdown();
context.reset(); context.reset();
cleanup();
return Application::EXIT_OK; return Application::EXIT_OK;
} }
catch (const Exception & e) catch (const Exception & e)
{ {
try
{
cleanup();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n'; std::cerr << getCurrentExceptionMessage(config().hasOption("stacktrace")) << '\n';
/// If exception code isn't zero, we should return non-zero return code anyway. /// If exception code isn't zero, we should return non-zero return code anyway.
@ -372,6 +420,29 @@ void LocalServer::setupUsers()
throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG); throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG);
} }
void LocalServer::cleanup()
{
// Delete the temporary directory if needed. Just in case, check that it is
// in the system temporary directory, not to delete user data if there is a
// bug.
if (temporary_directory_to_delete)
{
const auto tmp = std::filesystem::temp_directory_path();
const auto dir = *temporary_directory_to_delete;
temporary_directory_to_delete.reset();
if (dir.parent_path() != tmp)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"The temporary directory of clickhouse-local '{}' is not inside"
" the system temporary directory '{}'. Will not delete it",
dir.string(), tmp.string());
}
remove_all(dir);
}
}
static void showClientVersion() static void showClientVersion()
{ {
std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << '\n'; std::cout << DBMS_NAME << " client version " << VERSION_STRING << VERSION_OFFICIAL << "." << '\n';

View File

@ -2,7 +2,9 @@
#include <Core/Settings.h> #include <Core/Settings.h>
#include <Poco/Util/Application.h> #include <Poco/Util/Application.h>
#include <filesystem>
#include <memory> #include <memory>
#include <optional>
#include <loggers/Loggers.h> #include <loggers/Loggers.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
@ -38,6 +40,7 @@ private:
void applyCmdSettings(); void applyCmdSettings();
void processQueries(); void processQueries();
void setupUsers(); void setupUsers();
void cleanup();
protected: protected:
SharedContextHolder shared_context; SharedContextHolder shared_context;
@ -45,6 +48,8 @@ protected:
/// Settings specified via command line args /// Settings specified via command line args
Settings cmd_settings; Settings cmd_settings;
std::optional<std::filesystem::path> temporary_directory_to_delete;
}; };
} }

View File

@ -14,7 +14,6 @@
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeFactory.h> #include <DataTypes/DataTypeFactory.h>
#include <Interpreters/Context.h> #include <Interpreters/Context.h>
#include <DataStreams/IBlockInputStream.h>
#include <DataStreams/IBlockOutputStream.h> #include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/LimitBlockInputStream.h> #include <DataStreams/LimitBlockInputStream.h>
#include <Common/SipHash.h> #include <Common/SipHash.h>
@ -31,7 +30,6 @@
#include <ext/bit_cast.h> #include <ext/bit_cast.h>
#include <memory> #include <memory>
#include <cmath> #include <cmath>
#include <optional>
#include <unistd.h> #include <unistd.h>
#include <boost/program_options/options_description.hpp> #include <boost/program_options/options_description.hpp>
#include <boost/program_options.hpp> #include <boost/program_options.hpp>

View File

@ -10,7 +10,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
PingHandler.cpp PingHandler.cpp
validateODBCConnectionString.cpp validateODBCConnectionString.cpp
) )
set (CLICKHOUSE_ODBC_BRIDGE_LINK set (CLICKHOUSE_ODBC_BRIDGE_LINK
PRIVATE PRIVATE
clickhouse_parsers clickhouse_parsers

View File

@ -431,6 +431,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
DateLUT::instance(); DateLUT::instance();
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone()); LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());
/// Initialize global thread pool
GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 10000));
/// Storage with temporary data for processing of heavy queries. /// Storage with temporary data for processing of heavy queries.
{ {
@ -647,12 +649,22 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (max_server_memory_usage == 0) if (max_server_memory_usage == 0)
{ {
max_server_memory_usage = default_max_server_memory_usage; max_server_memory_usage = default_max_server_memory_usage;
LOG_INFO(log, "Setting max_server_memory_usage was set to {}", formatReadableSizeWithBinarySuffix(max_server_memory_usage)); LOG_INFO(log, "Setting max_server_memory_usage was set to {}"
" ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)",
formatReadableSizeWithBinarySuffix(max_server_memory_usage),
formatReadableSizeWithBinarySuffix(memory_amount),
max_server_memory_usage_to_ram_ratio);
} }
else if (max_server_memory_usage > default_max_server_memory_usage) else if (max_server_memory_usage > default_max_server_memory_usage)
{ {
max_server_memory_usage = default_max_server_memory_usage; max_server_memory_usage = default_max_server_memory_usage;
LOG_INFO(log, "Setting max_server_memory_usage was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(max_server_memory_usage)); LOG_INFO(log, "Setting max_server_memory_usage was lowered to {}"
" because the system has low amount of memory. The amount was"
" calculated as {} available"
" * {:.2f} max_server_memory_usage_to_ram_ratio",
formatReadableSizeWithBinarySuffix(max_server_memory_usage),
formatReadableSizeWithBinarySuffix(memory_amount),
max_server_memory_usage_to_ram_ratio);
} }
total_memory_tracker.setOrRaiseHardLimit(max_server_memory_usage); total_memory_tracker.setOrRaiseHardLimit(max_server_memory_usage);
@ -849,7 +861,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
}; };
/// This object will periodically calculate some metrics. /// This object will periodically calculate some metrics.
AsynchronousMetrics async_metrics(*global_context); AsynchronousMetrics async_metrics(*global_context,
config().getUInt("asynchronous_metrics_update_period_s", 60));
attachSystemTablesAsync(*DatabaseCatalog::instance().getSystemDatabase(), async_metrics); attachSystemTablesAsync(*DatabaseCatalog::instance().getSystemDatabase(), async_metrics);
for (const auto & listen_host : listen_hosts) for (const auto & listen_host : listen_hosts)

View File

@ -45,6 +45,18 @@
--> -->
</logger> </logger>
<send_crash_reports>
<!-- Changing <enabled> to true allows sending crash reports to -->
<!-- the ClickHouse core developers team via Sentry https://sentry.io -->
<!-- Doing so at least in pre-production environments is highly appreciated -->
<enabled>false</enabled>
<!-- Change <anonymize> to true if you don't feel comfortable attaching the server hostname to the crash report -->
<anonymize>false</anonymize>
<!-- Default endpoint should be changed to different Sentry DSN only if you have -->
<!-- some in-house engineers or hired consultants who're going to debug ClickHouse issues for you -->
<endpoint>https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277</endpoint>
</send_crash_reports>
<!--display_name>production</display_name--> <!-- It is the name that will be shown in the client --> <!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
<http_port>8123</http_port> <http_port>8123</http_port>
<tcp_port>9000</tcp_port> <tcp_port>9000</tcp_port>
@ -136,6 +148,15 @@
--> -->
<max_server_memory_usage>0</max_server_memory_usage> <max_server_memory_usage>0</max_server_memory_usage>
<!-- Maximum number of threads in the Global thread pool.
This will default to a maximum of 10000 threads if not specified.
This setting will be useful in scenarios where there are a large number
of distributed queries that are running concurrently but are idling most
of the time, in which case a higher number of threads might be required.
-->
<max_thread_pool_size>10000</max_thread_pool_size>
<!-- On memory constrained environments you may have to set this to value larger than 1. <!-- On memory constrained environments you may have to set this to value larger than 1.
--> -->
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio> <max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>

View File

@ -133,6 +133,7 @@ enum class AccessType
M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP_REPLICATED_SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \ M(SYSTEM_REPLICATED_SENDS, "SYSTEM STOP REPLICATED SENDS, SYSTEM START REPLICATED SENDS, STOP_REPLICATED_SENDS, START REPLICATED SENDS", TABLE, SYSTEM_SENDS) \
M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \ M(SYSTEM_SENDS, "SYSTEM STOP SENDS, SYSTEM START SENDS, STOP SENDS, START SENDS", GROUP, SYSTEM) \
M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP_REPLICATION_QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \ M(SYSTEM_REPLICATION_QUEUES, "SYSTEM STOP REPLICATION QUEUES, SYSTEM START REPLICATION QUEUES, STOP_REPLICATION_QUEUES, START REPLICATION QUEUES", TABLE, SYSTEM) \
M(SYSTEM_DROP_REPLICA, "DROP REPLICA", TABLE, SYSTEM) \
M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \ M(SYSTEM_SYNC_REPLICA, "SYNC REPLICA", TABLE, SYSTEM) \
M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \ M(SYSTEM_RESTART_REPLICA, "RESTART REPLICA", TABLE, SYSTEM) \
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \ M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \

View File

@ -93,7 +93,7 @@ public:
buf.read(c); buf.read(c);
} }
void insertResultInto(AggregateDataPtr, IColumn & to) const override void insertResultInto(AggregateDataPtr, IColumn & to, Arena *) const override
{ {
to.insertDefault(); to.insertDefault();
} }

View File

@ -85,7 +85,7 @@ public:
return Data::allocatesMemoryInArena(); return Data::allocatesMemoryInArena();
} }
void insertResultInto(AggregateDataPtr place, IColumn & to) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{ {
this->data(place).result.insertResultInto(to); this->data(place).result.insertResultInto(to);
} }

View File

@ -119,9 +119,9 @@ public:
nested_func->deserialize(place, buf, arena); nested_func->deserialize(place, buf, arena);
} }
void insertResultInto(AggregateDataPtr place, IColumn & to) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override
{ {
nested_func->insertResultInto(place, to); nested_func->insertResultInto(place, to, arena);
} }
bool allocatesMemoryInArena() const override bool allocatesMemoryInArena() const override

View File

@ -80,7 +80,7 @@ public:
readBinary(this->data(place).denominator, buf); readBinary(this->data(place).denominator, buf);
} }
void insertResultInto(AggregateDataPtr place, IColumn & to) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{ {
auto & column = static_cast<ColVecResult &>(to); auto & column = static_cast<ColVecResult &>(to);
column.getData().push_back(this->data(place).template result<ResultType>()); column.getData().push_back(this->data(place).template result<ResultType>());

View File

@ -74,7 +74,7 @@ public:
readBinary(this->data(place).value, buf); readBinary(this->data(place).value, buf);
} }
void insertResultInto(AggregateDataPtr place, IColumn & to) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{ {
assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value); assert_cast<ColumnVector<T> &>(to).getData().push_back(this->data(place).value);
} }

View File

@ -150,7 +150,7 @@ public:
data(place).deserialize(buf); data(place).deserialize(buf);
} }
void insertResultInto(AggregateDataPtr place, IColumn & to) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{ {
assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place))); assert_cast<ColumnFloat64 &>(to).getData().push_back(getBoundingRatio(data(place)));
} }

View File

@ -119,8 +119,8 @@ public:
void insertResultInto( void insertResultInto(
AggregateDataPtr place, AggregateDataPtr place,
IColumn & to IColumn & to,
) const override Arena *) const override
{ {
auto & col = static_cast<ColumnArray &>(to); auto & col = static_cast<ColumnArray &>(to);
auto & data_col = static_cast<ColumnFloat64 &>(col.getData()); auto & data_col = static_cast<ColumnFloat64 &>(col.getData());

View File

@ -57,7 +57,7 @@ public:
readVarUInt(data(place).count, buf); readVarUInt(data(place).count, buf);
} }
void insertResultInto(AggregateDataPtr place, IColumn & to) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{ {
assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count); assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
} }
@ -112,7 +112,7 @@ public:
readVarUInt(data(place).count, buf); readVarUInt(data(place).count, buf);
} }
void insertResultInto(AggregateDataPtr place, IColumn & to) const override void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
{ {
assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count); assert_cast<ColumnUInt64 &>(to).getData().push_back(data(place).count);
} }

Some files were not shown because too many files have changed in this diff Show More