Merge remote-tracking branch 'origin/master' into HEAD

This commit is contained in:
Alexander Kuzmenkov 2020-05-25 06:21:27 +03:00
commit f1fb7245e5
460 changed files with 5797 additions and 3093 deletions

12
.arcignore Normal file
View File

@ -0,0 +1,12 @@
# .arcignore is the same as .gitignore but for Arc VCS.
# Arc VCS is a proprietary VCS in Yandex that is very similar to Git
# from the user perspective but with the following differences:
# 1. Data is stored in distributed object storage.
# 2. Local copy works via FUSE without downloading all the objects.
# For this reason, it is better suited for huge monorepositories that can be found in large companies (e.g. Yandex, Google).
# As ClickHouse developers, we don't use Arc as a VCS (we use Git).
# But the ClickHouse source code is also mirrored into internal monorepository and our collegues are using Arc.
# You can read more about Arc here: https://habr.com/en/company/yandex/blog/482926/
# Repository is synchronized without 3rd-party submodules.
contrib

View File

@ -9,7 +9,7 @@ Checks: '-*,
misc-unused-alias-decls,
misc-unused-parameters,
misc-unused-using-decls,
modernize-avoid-bind,
modernize-loop-convert,
modernize-make-shared,
@ -33,7 +33,7 @@ Checks: '-*,
performance-no-automatic-move,
performance-trivially-destructible,
performance-unnecessary-copy-initialization,
readability-avoid-const-params-in-decls,
readability-const-return-type,
readability-container-size-empty,
@ -58,7 +58,7 @@ Checks: '-*,
readability-simplify-boolean-expr,
readability-inconsistent-declaration-parameter-name,
readability-identifier-naming,
bugprone-undelegated-constructor,
bugprone-argument-comment,
bugprone-bad-signal-to-kill-thread,
@ -102,7 +102,7 @@ Checks: '-*,
bugprone-unused-return-value,
bugprone-use-after-move,
bugprone-virtual-near-miss,
cert-dcl21-cpp,
cert-dcl50-cpp,
cert-env33-c,
@ -112,7 +112,7 @@ Checks: '-*,
cert-mem57-cpp,
cert-msc50-cpp,
cert-oop58-cpp,
google-build-explicit-make-pair,
google-build-namespaces,
google-default-arguments,
@ -121,9 +121,9 @@ Checks: '-*,
google-readability-avoid-underscore-in-googletest-name,
google-runtime-int,
google-runtime-operator,
hicpp-exception-baseclass,
clang-analyzer-core.CallAndMessage,
clang-analyzer-core.DivideZero,
clang-analyzer-core.NonNullParamChecker,

View File

@ -9,7 +9,7 @@ Changelog category (leave one):
- Build/Testing/Packaging Improvement
- Documentation (changelog entry is not required)
- Other
- Non-significant (changelog entry is not required)
- Not for changelog (changelog entry is not required)
Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):

3
.gitmodules vendored
View File

@ -157,3 +157,6 @@
[submodule "contrib/openldap"]
path = contrib/openldap
url = https://github.com/openldap/openldap.git
[submodule "contrib/fmtlib"]
path = contrib/fmtlib
url = https://github.com/fmtlib/fmt.git

View File

@ -79,6 +79,7 @@ target_link_libraries (common
Poco::Util
Poco::Foundation
replxx
fmt
PRIVATE
cctz

View File

@ -2,16 +2,14 @@
/// Macros for convenient usage of Poco logger.
#include <sstream>
#include <fmt/format.h>
#include <fmt/ostream.h>
#include <Poco/Logger.h>
#include <Poco/Message.h>
#include <Poco/Version.h>
#include <Common/CurrentThread.h>
#ifndef QUERY_PREVIEW_LENGTH
#define QUERY_PREVIEW_LENGTH 160
#endif
/// TODO Remove this.
using Poco::Logger;
using Poco::Message;
using DB::LogsLevel;
@ -19,21 +17,20 @@ using DB::CurrentThread;
/// Logs a message to a specified logger with that level.
#define LOG_SIMPLE(logger, message, priority, PRIORITY) do \
#define LOG_IMPL(logger, priority, PRIORITY, ...) do \
{ \
const bool is_clients_log = (CurrentThread::getGroup() != nullptr) && \
(CurrentThread::getGroup()->client_logs_level >= (priority)); \
if ((logger)->is((PRIORITY)) || is_clients_log) \
{ \
std::stringstream oss_internal_rare; \
oss_internal_rare << message; \
std::string formatted_message = fmt::format(__VA_ARGS__); \
if (auto channel = (logger)->getChannel()) \
{ \
std::string file_function; \
file_function += __FILE__; \
file_function += "; "; \
file_function += __PRETTY_FUNCTION__; \
Message poco_message((logger)->name(), oss_internal_rare.str(), \
Message poco_message((logger)->name(), formatted_message, \
(PRIORITY), file_function.c_str(), __LINE__); \
channel->log(poco_message); \
} \
@ -41,10 +38,9 @@ using DB::CurrentThread;
} while (false)
#define LOG_TRACE(logger, message) LOG_SIMPLE(logger, message, LogsLevel::trace, Message::PRIO_TRACE)
#define LOG_DEBUG(logger, message) LOG_SIMPLE(logger, message, LogsLevel::debug, Message::PRIO_DEBUG)
#define LOG_INFO(logger, message) LOG_SIMPLE(logger, message, LogsLevel::information, Message::PRIO_INFORMATION)
#define LOG_WARNING(logger, message) LOG_SIMPLE(logger, message, LogsLevel::warning, Message::PRIO_WARNING)
#define LOG_ERROR(logger, message) LOG_SIMPLE(logger, message, LogsLevel::error, Message::PRIO_ERROR)
#define LOG_FATAL(logger, message) LOG_SIMPLE(logger, message, LogsLevel::error, Message::PRIO_FATAL)
#define LOG_TRACE(logger, ...) LOG_IMPL(logger, LogsLevel::trace, Message::PRIO_TRACE, __VA_ARGS__)
#define LOG_DEBUG(logger, ...) LOG_IMPL(logger, LogsLevel::debug, Message::PRIO_DEBUG, __VA_ARGS__)
#define LOG_INFO(logger, ...) LOG_IMPL(logger, LogsLevel::information, Message::PRIO_INFORMATION, __VA_ARGS__)
#define LOG_WARNING(logger, ...) LOG_IMPL(logger, LogsLevel::warning, Message::PRIO_WARNING, __VA_ARGS__)
#define LOG_ERROR(logger, ...) LOG_IMPL(logger, LogsLevel::error, Message::PRIO_ERROR, __VA_ARGS__)
#define LOG_FATAL(logger, ...) LOG_IMPL(logger, LogsLevel::error, Message::PRIO_FATAL, __VA_ARGS__)

View File

@ -24,6 +24,7 @@ PEERDIR(
contrib/libs/cxxsupp/libcxx-filesystem
contrib/libs/poco/Net
contrib/libs/poco/Util
contrib/libs/fmt
contrib/restricted/boost
contrib/restricted/cityhash-1.0.2
)

View File

@ -180,7 +180,7 @@ public:
// levels and more info, but for completeness we log all signals
// here at trace level.
// Don't use strsignal here, because it's not thread-safe.
LOG_TRACE(log, "Received signal " << sig);
LOG_TRACE(log, "Received signal {}", sig);
if (sig == Signals::StopThread)
{
@ -236,7 +236,7 @@ private:
void onTerminate(const std::string & message, UInt32 thread_num) const
{
LOG_FATAL(log, "(version " << VERSION_STRING << VERSION_OFFICIAL << ") (from thread " << thread_num << ") " << message);
LOG_FATAL(log, "(version {}{}) (from thread {}) {}", VERSION_STRING, VERSION_OFFICIAL, thread_num, message);
}
void onFault(
@ -257,9 +257,9 @@ private:
message << " (no query)";
else
message << " (query_id: " << query_id << ")";
message << " Received signal " << strsignal(sig) << " (" << sig << ")" << ".";
message << " Received signal " << strsignal(sig) << " (" << sig << ").";
LOG_FATAL(log, message.rdbuf());
LOG_FATAL(log, message.str());
}
LOG_FATAL(log, signalToErrorMessage(sig, info, context));
@ -274,7 +274,7 @@ private:
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
bare_stacktrace << ' ' << stack_trace.getFrames()[i];
LOG_FATAL(log, bare_stacktrace.rdbuf());
LOG_FATAL(log, bare_stacktrace.str());
}
/// Write symbolized stack trace line by line for better grep-ability.
@ -302,7 +302,7 @@ static void sanitizerDeathCallback()
message << " (query_id: " << query_id << ")";
message << " Sanitizer trap.";
LOG_FATAL(log, message.rdbuf());
LOG_FATAL(log, message.str());
}
/// Just in case print our own stack trace. In case when llvm-symbolizer does not work.
@ -314,7 +314,7 @@ static void sanitizerDeathCallback()
for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
bare_stacktrace << ' ' << stack_trace.getFrames()[i];
LOG_FATAL(log, bare_stacktrace.rdbuf());
LOG_FATAL(log, bare_stacktrace.str());
}
/// Write symbolized stack trace line by line for better grep-ability.
@ -379,7 +379,7 @@ static bool tryCreateDirectories(Poco::Logger * logger, const std::string & path
}
catch (...)
{
LOG_WARNING(logger, __PRETTY_FUNCTION__ << ": when creating " << path << ", " << DB::getCurrentExceptionMessage(true));
LOG_WARNING(logger, "{}: when creating {}, {}", __PRETTY_FUNCTION__, path, DB::getCurrentExceptionMessage(true));
}
return false;
}
@ -498,11 +498,10 @@ void debugIncreaseOOMScore()
}
catch (const Poco::Exception & e)
{
LOG_WARNING(&Logger::root(), "Failed to adjust OOM score: '" +
e.displayText() + "'.");
LOG_WARNING(&Logger::root(), "Failed to adjust OOM score: '{}'.", e.displayText());
return;
}
LOG_INFO(&Logger::root(), "Set OOM score adjustment to " + new_score);
LOG_INFO(&Logger::root(), "Set OOM score adjustment to {}", new_score);
}
#else
void debugIncreaseOOMScore() {}
@ -734,7 +733,7 @@ void BaseDaemon::handleNotification(Poco::TaskFailedNotification *_tfn)
task_failed = true;
Poco::AutoPtr<Poco::TaskFailedNotification> fn(_tfn);
Logger *lg = &(logger());
LOG_ERROR(lg, "Task '" << fn->task()->name() << "' failed. Daemon is shutting down. Reason - " << fn->reason().displayText());
LOG_ERROR(lg, "Task '{}' failed. Daemon is shutting down. Reason - {}", fn->task()->name(), fn->reason().displayText());
ServerApplication::terminate();
}
@ -850,7 +849,7 @@ void BaseDaemon::handleSignal(int signal_id)
void BaseDaemon::onInterruptSignals(int signal_id)
{
is_cancelled = true;
LOG_INFO(&logger(), "Received termination signal (" << strsignal(signal_id) << ")");
LOG_INFO(&logger(), "Received termination signal ({})", strsignal(signal_id));
if (sigint_signals_counter >= 2)
{

View File

@ -52,8 +52,7 @@ private:
}
catch (const Poco::Exception & e)
{
LOG_WARNING(&Poco::Util::Application::instance().logger(),
"Fail to write to Graphite " << host << ":" << port << ". e.what() = " << e.what() << ", e.message() = " << e.message());
LOG_WARNING(&Poco::Util::Application::instance().logger(), "Fail to write to Graphite {}:{}. e.what() = {}, e.message() = {}", host, port, e.what(), e.message());
}
}

View File

@ -4,6 +4,7 @@
#include <ctime>
#include <string>
#include <iomanip>
#include <sstream>
namespace ext

View File

@ -162,4 +162,10 @@ elseif (COMPILER_GCC)
add_cxx_compile_options(-Wunused)
# Warn if vector operation is not implemented via SIMD capabilities of the architecture
add_cxx_compile_options(-Wvector-operation-performance)
# XXX: gcc10 stuck with this option while compiling GatherUtils code
# (anyway there are builds with clang, that will warn)
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 10)
add_cxx_compile_options(-Wno-sequence-point)
endif()
endif ()

View File

@ -317,3 +317,5 @@ endif()
if (USE_FASTOPS)
add_subdirectory (fastops-cmake)
endif()
add_subdirectory (fmtlib-cmake)

1
contrib/fmtlib vendored Submodule

@ -0,0 +1 @@
Subproject commit 297c3b2ed551a4989826fc8c4780bf533e964bd9

View File

@ -0,0 +1,20 @@
set (SRCS
../fmtlib/src/format.cc
../fmtlib/src/os.cc
../fmtlib/include/fmt/chrono.h
../fmtlib/include/fmt/color.h
../fmtlib/include/fmt/compile.h
../fmtlib/include/fmt/core.h
../fmtlib/include/fmt/format.h
../fmtlib/include/fmt/format-inl.h
../fmtlib/include/fmt/locale.h
../fmtlib/include/fmt/os.h
../fmtlib/include/fmt/ostream.h
../fmtlib/include/fmt/posix.h
../fmtlib/include/fmt/printf.h
../fmtlib/include/fmt/ranges.h
)
add_library(fmt ${SRCS})
target_include_directories(fmt SYSTEM PUBLIC ../fmtlib/include)

View File

@ -9,6 +9,17 @@ if (ENABLE_JEMALLOC)
option (USE_INTERNAL_JEMALLOC "Use internal jemalloc library" ${NOT_UNBUNDLED})
if (USE_INTERNAL_JEMALLOC)
# ThreadPool select job randomly, and there can be some threads that had been
# performed some memory heavy task before and will be inactive for some time,
# but until it will became active again, the memory will not be freed since by
# default each thread has it's own arena, but there should be not more then
# 4*CPU arenas (see opt.nareans description).
#
# By enabling percpu_arena number of arenas limited to number of CPUs and hence
# this problem should go away.
set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu" CACHE STRING "Change default configuration string of JEMalloc" )
message (STATUS "jemalloc malloc_conf: ${JEMALLOC_CONFIG_MALLOC_CONF}")
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc")
set (SRCS
@ -52,11 +63,19 @@ if (ENABLE_JEMALLOC)
add_library(jemalloc ${SRCS})
target_include_directories(jemalloc PRIVATE ${LIBRARY_DIR}/include)
target_include_directories(jemalloc SYSTEM PUBLIC include)
set(JEMALLOC_INCLUDE)
if (ARCH_AMD64)
target_include_directories(jemalloc SYSTEM PUBLIC include_linux_x86_64)
set(JEMALLOC_INCLUDE_PREFIX include_linux_x86_64)
elseif (ARCH_ARM)
target_include_directories(jemalloc SYSTEM PUBLIC include_linux_aarch64)
set(JEMALLOC_INCLUDE_PREFIX include_linux_aarch64)
endif ()
target_include_directories(jemalloc SYSTEM PUBLIC
${JEMALLOC_INCLUDE_PREFIX})
configure_file(${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h.in
${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal/jemalloc_internal_defs.h)
target_include_directories(jemalloc SYSTEM PRIVATE
${CMAKE_CURRENT_BINARY_DIR}/${JEMALLOC_INCLUDE_PREFIX}/jemalloc/internal)
target_compile_definitions(jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)

View File

@ -5,3 +5,4 @@ Added #define GNU_SOURCE
Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not.
Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard.
Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5.
Added JEMALLOC_CONFIG_MALLOC_CONF substitution

View File

@ -369,7 +369,7 @@
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF ""
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#define JEMALLOC_IS_MALLOC 1

View File

@ -5,3 +5,4 @@ Added #define GNU_SOURCE
Added JEMALLOC_OVERRIDE___POSIX_MEMALIGN because why not.
Removed JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF because it's non standard.
Removed JEMALLOC_PURGE_MADVISE_FREE because it's available only from Linux 4.5.
Added JEMALLOC_CONFIG_MALLOC_CONF substitution

View File

@ -360,7 +360,7 @@
/* #undef JEMALLOC_EXPORT */
/* config.malloc_conf options string. */
#define JEMALLOC_CONFIG_MALLOC_CONF ""
#define JEMALLOC_CONFIG_MALLOC_CONF "@JEMALLOC_CONFIG_MALLOC_CONF@"
/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
#define JEMALLOC_IS_MALLOC 1

11
debian/rules vendored
View File

@ -24,6 +24,10 @@ DEB_BUILD_OPTIONS+=parallel=$(THREADS_COUNT)
ifndef ENABLE_TESTS
CMAKE_FLAGS += -DENABLE_TESTS=0
else
# To export binaries and from deb build we do not strip them. No need to run tests in deb build as we run them in CI
DEB_BUILD_OPTIONS+= nocheck
DEB_BUILD_OPTIONS+= nostrip
endif
ifndef MAKE_TARGET
@ -88,14 +92,19 @@ override_dh_auto_build:
$(MAKE) $(THREADS_COUNT) -C $(BUILDDIR) $(MAKE_TARGET)
override_dh_auto_test:
ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server
endif
override_dh_clean:
rm -rf debian/copyright debian/clickhouse-client.docs debian/clickhouse-common-static.docs
dh_clean -X contrib
dh_clean # -X contrib
override_dh_strip:
#https://www.debian.org/doc/debian-policy/ch-source.html#debian-rules-and-deb-build-options
ifeq (,$(filter nostrip,$(DEB_BUILD_OPTIONS)))
dh_strip -pclickhouse-common-static --dbg-package=clickhouse-common-static-dbg
endif
override_dh_install:
# Making docs

View File

@ -5,6 +5,7 @@ RUN apt-get --allow-unauthenticated update -y && apt-get install --yes wget gnup
RUN wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
RUN echo "deb [trusted=yes] http://apt.llvm.org/eoan/ llvm-toolchain-eoan-10 main" >> /etc/apt/sources.list
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
@ -17,6 +18,14 @@ RUN apt-get --allow-unauthenticated update -y \
apt-transport-https \
ca-certificates
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
RUN chmod +x dpkg-deb
RUN cp dpkg-deb /usr/bin
# Libraries from OS are only needed to test the "unbundled" build (that is not used in production).
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
@ -74,12 +83,6 @@ RUN apt-get --allow-unauthenticated update -y \
libldap2-dev
# Special dpkg-deb (https://github.com/ClickHouse-Extras/dpkg) version which is able
# to compress files using pigz (https://zlib.net/pigz/) instead of gzip.
# Significantly increase deb packaging speed and compatible with old systems
RUN curl -O https://clickhouse-builds.s3.yandex.net/utils/dpkg-deb
RUN chmod +x dpkg-deb
RUN cp dpkg-deb /usr/bin
# This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-10 /usr/bin/ld.lld

View File

@ -10,5 +10,16 @@ mv *.changes /output
mv *.buildinfo /output
mv /*.rpm /output ||: # if exists
mv /*.tgz /output ||: # if exists
if [ -n "$BINARY_OUTPUT" ] && { [ "$BINARY_OUTPUT" = "programs" ] || [ "$BINARY_OUTPUT" = "tests" ] ;}
then
echo Place $BINARY_OUTPUT to output
mkdir /output/binary ||: # if exists
mv /build/obj-*/programs/clickhouse* /output/binary
if [ "$BINARY_OUTPUT" = "tests" ]
then
mv /build/obj-*/src/unit_tests_dbms /output/binary
fi
fi
ccache --show-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:

View File

@ -54,7 +54,7 @@ def run_docker_image_with_env(image_name, output, env_variables, ch_root, ccache
subprocess.check_call(cmd, shell=True)
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage):
def parse_env_variables(build_type, compiler, sanitizer, package_type, image_type, cache, distcc_hosts, unbundled, split_binary, clang_tidy, version, author, official, alien_pkgs, with_coverage, with_binaries):
CLANG_PREFIX = "clang"
DARWIN_SUFFIX = "-darwin"
ARM_SUFFIX = "-aarch64"
@ -131,6 +131,14 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
if alien_pkgs:
result.append("ALIEN_PKGS='" + ' '.join(['--' + pkg for pkg in alien_pkgs]) + "'")
if with_binaries == "programs":
result.append('BINARY_OUTPUT=programs')
elif with_binaries == "tests":
result.append('ENABLE_TESTS=1')
result.append('BINARY_OUTPUT=tests')
cmake_flags.append('-DENABLE_TESTS=1')
cmake_flags.append('-DUSE_GTEST=1')
if unbundled:
# TODO: fix build with ENABLE_RDKAFKA
cmake_flags.append('-DUNBUNDLED=1 -DENABLE_MYSQL=0 -DENABLE_ODBC=0 -DENABLE_REPLXX=0 -DENABLE_RDKAFKA=0')
@ -179,6 +187,7 @@ if __name__ == "__main__":
parser.add_argument("--official", action="store_true")
parser.add_argument("--alien-pkgs", nargs='+', default=[])
parser.add_argument("--with-coverage", action="store_true")
parser.add_argument("--with-binaries", choices=("programs", "tests", ""), default="")
args = parser.parse_args()
if not os.path.isabs(args.output_dir):
@ -195,6 +204,12 @@ if __name__ == "__main__":
if args.alien_pkgs and not image_type == "deb":
raise Exception("Can add alien packages only in deb build")
if args.with_binaries != "" and not image_type == "deb":
raise Exception("Can add additional binaries only in deb build")
if args.with_binaries != "" and image_type == "deb":
logging.info("Should place {} to output".format(args.with_binaries))
dockerfile = os.path.join(ch_root, "docker/packager", image_type, "Dockerfile")
if image_type != "freebsd" and not check_image_exists_locally(image_name) or args.force_build_image:
if not pull_image(image_name) or args.force_build_image:
@ -202,6 +217,6 @@ if __name__ == "__main__":
env_prepared = parse_env_variables(
args.build_type, args.compiler, args.sanitizer, args.package_type, image_type,
args.cache, args.distcc_hosts, args.unbundled, args.split_binary, args.clang_tidy,
args.version, args.author, args.official, args.alien_pkgs, args.with_coverage)
args.version, args.author, args.official, args.alien_pkgs, args.with_coverage, args.with_binaries)
run_docker_image_with_env(image_name, args.output_dir, env_prepared, ch_root, args.ccache_dir)
logging.info("Output placed into {}".format(args.output_dir))

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
services:
hdfs1:
image: sequenceiq/hadoop-docker:2.7.0

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
services:
kafka_zookeeper:

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
services:
minio1:

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
services:
mongo1:
image: mongo:3.6
@ -8,3 +8,4 @@ services:
MONGO_INITDB_ROOT_PASSWORD: clickhouse
ports:
- 27018:27017
command: --profile=2 --verbose

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
services:
mysql1:
image: mysql:5.7

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
networks:
default:
driver: bridge

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
services:
postgres1:
image: postgres

View File

@ -1,4 +1,4 @@
version: '2.2'
version: '2.3'
services:
redis1:
image: redis

View File

@ -1,25 +1,47 @@
version: '2.2'
version: '2.3'
services:
zoo1:
image: zookeeper:3.4.12
restart: always
environment:
ZOO_TICK_TIME: 500
ZOO_MY_ID: 1
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
ZOO_MY_ID: 1
JVMFLAGS: -Dzookeeper.forceSync=no
volumes:
- type: ${ZK_FS:-tmpfs}
source: ${ZK_DATA1:-}
target: /data
- type: ${ZK_FS:-tmpfs}
source: ${ZK_DATA_LOG1:-}
target: /datalog
zoo2:
image: zookeeper:3.4.12
restart: always
environment:
ZOO_TICK_TIME: 500
ZOO_MY_ID: 2
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
ZOO_MY_ID: 2
JVMFLAGS: -Dzookeeper.forceSync=no
volumes:
- type: ${ZK_FS:-tmpfs}
source: ${ZK_DATA2:-}
target: /data
- type: ${ZK_FS:-tmpfs}
source: ${ZK_DATA_LOG2:-}
target: /datalog
zoo3:
image: zookeeper:3.4.12
restart: always
environment:
ZOO_TICK_TIME: 500
ZOO_MY_ID: 3
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
ZOO_MY_ID: 3
JVMFLAGS: -Dzookeeper.forceSync=no
volumes:
- type: ${ZK_FS:-tmpfs}
source: ${ZK_DATA3:-}
target: /data
- type: ${ZK_FS:-tmpfs}
source: ${ZK_DATA_LOG3:-}
target: /datalog

View File

@ -76,6 +76,14 @@ directory.
More complex setup is possible, but inconvenient and requires some scripting.
See `manual-run.sh` for inspiration.
#### Compare two published releases
Use `compare-releases.sh`. It will download and extract static + dbg + test
packages for both releases, and then call the main comparison script
`compare.sh`, starting from `configure` stage.
```
compare-releaseses.sh 19.16.19.85 20.4.2.9
```
#### Statistical considerations
Generating randomization distribution for medians is tricky. Suppose we have N

View File

@ -0,0 +1,82 @@
#!/bin/bash
set -ex
set -o pipefail
trap "exit" INT TERM
trap 'kill $(jobs -pr) ||:' EXIT
left_version=${1}
right_version=${2}
if [ "$left_version" == "" ] || [ "$right_version" == "" ]
then
>&2 echo Usage: $(basename "$0") left_version right_version
exit 1
fi
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
repo_dir=${repo_dir:-$(readlink -f "$script_dir/../../..")}
function download_package() # (version, path)
{
version="$1"
path="$2"
cd "$path"
wget -nv -nd -nc "https://repo.clickhouse.tech/deb/stable/main/clickhouse-common-static-dbg_${version}_amd64.deb" ||:
wget -nv -nd -nc "https://repo.clickhouse.tech/deb/stable/main/clickhouse-common-static_${version}_amd64.deb" ||:
wget -nv -nd -nc "https://repo.clickhouse.tech/deb/stable/main/clickhouse-test_${version}_all.deb" ||:
mkdir tmp ||:
for x in *.deb; do dpkg-deb -x "$x" tmp ; done
mv tmp/usr/bin/clickhouse ./clickhouse
mkdir .debug
mv tmp/usr/lib/debug/usr/bin/clickhouse .debug/clickhouse
mv tmp/usr/share/clickhouse-test/performance .
ln -s clickhouse clickhouse-local
ln -s clickhouse clickhouse-client
ln -s clickhouse clickhouse-server
rm -rf tmp
}
function download
{
rm -r left right db0 ||:
mkdir left right db0 ||:
"$script_dir/download.sh" ||: &
download_package "$left_version" left &
download_package "$right_version" right &
wait
rm -rf {right,left}/tmp
}
function configure
{
# Configs
cp -av "$script_dir/config" right
cp -av "$script_dir/config" left
cp -av "$repo_dir"/programs/server/config* right/config
cp -av "$repo_dir"/programs/server/user* right/config
cp -av "$repo_dir"/programs/server/config* left/config
cp -av "$repo_dir"/programs/server/user* left/config
}
function run
{
left/clickhouse-local --query "select * from system.build_options format PrettySpace" | sed 's/ *$//' | fold -w 80 -s > left-commit.txt
right/clickhouse-local --query "select * from system.build_options format PrettySpace" | sed 's/ *$//' | fold -w 80 -s > right-commit.txt
PATH=right:"$PATH" \
CHPC_TEST_PATH=right/performance \
stage=configure \
"$script_dir/compare.sh" &> >(tee compare.log)
}
download
configure
run
rm output.7z
7z a output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs}

View File

@ -133,7 +133,7 @@ function run_tests
fi
# Delete old report files.
for x in {test-times,skipped-tests,wall-clock-times,report-thresholds,client-times}.tsv
for x in {test-times,wall-clock-times}.tsv
do
rm -v "$x" ||:
touch "$x"
@ -155,9 +155,6 @@ function run_tests
TIMEFORMAT=$(printf "$test_name\t%%3R\t%%3U\t%%3S\n")
# the grep is to filter out set -x output and keep only time output
{ time "$script_dir/perf.py" --host localhost localhost --port 9001 9002 -- "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" ; } 2>&1 >/dev/null | grep -v ^+ >> "wall-clock-times.tsv" || continue
# The test completed with zero status, so we treat stderr as warnings
mv "$test_name-err.log" "$test_name-warn.log"
done
unset TIMEFORMAT
@ -217,69 +214,137 @@ function get_profiles
clickhouse-client --port 9002 --query "select 1"
}
function build_log_column_definitions
{
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
# absolutely atrocious way. This should be done by the file() function itself.
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
do
paste -d' ' \
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
done
}
# Build and analyze randomization distribution for all queries.
function analyze_queries
{
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv client-times.tsv report-thresholds.tsv ||:
rm -v analyze-commands.txt analyze-errors.log all-queries.tsv unstable-queries.tsv ./*-report.tsv raw-queries.tsv ||:
rm -rf analyze ||:
mkdir analyze analyze/tmp ||:
build_log_column_definitions
# Split the raw test output into files suitable for analysis.
IFS=$'\n'
for test_file in $(find . -maxdepth 1 -name "*-raw.tsv" -print)
do
test_name=$(basename "$test_file" "-raw.tsv")
sed -n "s/^query\t//p" < "$test_file" > "$test_name-queries.tsv"
sed -n "s/^client-time/$test_name/p" < "$test_file" >> "client-times.tsv"
sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "report-thresholds.tsv"
sed -n "s/^skipped/$test_name/p" < "$test_file" >> "skipped-tests.tsv"
sed -n "s/^query\t/$test_name\t/p" < "$test_file" >> "analyze/query-runs.tsv"
sed -n "s/^client-time/$test_name/p" < "$test_file" >> "analyze/client-times.tsv"
sed -n "s/^report-threshold/$test_name/p" < "$test_file" >> "analyze/report-thresholds.tsv"
sed -n "s/^skipped/$test_name/p" < "$test_file" >> "analyze/skipped-tests.tsv"
sed -n "s/^display-name/$test_name/p" < "$test_file" >> "analyze/query-display-names.tsv"
done
unset IFS
# for each query run, prepare array of metrics from query log
clickhouse-local --query "
create view query_runs as select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float');
create view left_query_log as select *
from file('left-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "left-query-log.tsv.columns")');
create view right_query_log as select *
from file('right-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "right-query-log.tsv.columns")');
create table query_metrics engine File(TSV, -- do not add header -- will parse with grep
'analyze/query-run-metrics.tsv')
as select
test, query_index, 0 run, version,
[
-- server-reported time
query_duration_ms / toFloat64(1000)
, toFloat64(memory_usage)
-- client-reported time
, query_runs.time
] metrics
from (
select query_duration_ms, memory_usage, query_id, 0 version from left_query_log
union all
select query_duration_ms, memory_usage, query_id, 1 version from right_query_log
) query_logs
right join query_runs
using (query_id, version)
order by test, query_index
;
"
# This is a lateral join in bash... please forgive me.
# We don't have arrayPermute(), so I have to make random permutations with
# We don't have arrayPermute(), so I have to make random permutations with
# `order by rand`, and it becomes really slow if I do it for more than one
# query. We also don't have lateral joins. So I just put all runs of each
# query into a separate file, and then compute randomization distribution
# for each file. I do this in parallel using GNU parallel.
query_index=1
IFS=$'\n'
for test_file in $(find . -maxdepth 1 -name "*-queries.tsv" -print)
for prefix in $(cut -f1,2 "analyze/query-run-metrics.tsv" | sort | uniq)
do
test_name=$(basename "$test_file" "-queries.tsv")
query_index=1
for query in $(cut -d' ' -f1 "$test_file" | sort | uniq)
do
query_prefix="$test_name.q$query_index"
query_index=$((query_index + 1))
grep -F "$query " "$test_file" > "$query_prefix.tmp"
printf "%s\0\n" \
"clickhouse-local \
--file \"$query_prefix.tmp\" \
--structure 'query text, run int, version UInt32, time float' \
--query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"$test_name-report.tsv\"" \
2>> analyze-errors.log \
>> analyze-commands.txt
done
file="analyze/tmp/$(echo "$prefix" | sed 's/\t/_/g').tsv"
grep "^$prefix " "analyze/query-run-metrics.tsv" > "$file" &
printf "%s\0\n" \
"clickhouse-local \
--file \"$file\" \
--structure 'test text, query text, run int, version UInt8, metrics Array(float)' \
--query \"$(cat "$script_dir/eqmed.sql")\" \
>> \"analyze/query-reports.tsv\"" \
2>> analyze/errors.log \
>> analyze/commands.txt
done
wait
unset IFS
parallel --null < analyze-commands.txt
parallel --joblog analyze/parallel-log.txt --null < analyze/commands.txt 2>> analyze/errors.log
}
# Analyze results
function report
{
rm -r report ||:
mkdir report ||:
mkdir report report/tmp ||:
rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv ||:
cat analyze-errors.log >> report/errors.log ||:
build_log_column_definitions
cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
clickhouse-local --query "
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
;
create table query_metric_stats engine File(TSVWithNamesAndTypes,
'report/query-metric-stats.tsv') as
select metric_name, left, right, diff, stat_threshold, test, query_index,
query_display_name
from file ('analyze/query-reports.tsv', TSV, 'left Array(float),
right Array(float), diff Array(float), stat_threshold Array(float),
test text, query_index int') reports
left array join ['server_time', 'memory', 'client_time'] as metric_name,
left, right, diff, stat_threshold
left join query_display_names
on reports.test = query_display_names.test
and reports.query_index = query_display_names.query_index
;
-- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
-- FIXME Comparison mode doesn't make sense for queries that complete
@ -296,53 +361,65 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
left, right, diff, stat_threshold,
if(report_threshold > 0, report_threshold, 0.10) as report_threshold,
reports.test,
query
from
(
select *,
replaceAll(_file, '-report.tsv', '') test
from file('*-report.tsv', TSV, 'left float, right float, diff float, stat_threshold float, query text')
) reports
left join file('report-thresholds.tsv', TSV, 'test text, report_threshold float') thresholds
using test
;
test, query_index, query_display_name
from query_metric_stats
left join file('analyze/report-thresholds.tsv', TSV,
'test text, report_threshold float') thresholds
on query_metric_stats.test = thresholds.test
where metric_name = 'server_time'
order by test, query_index, metric_name
;
-- keep the table in old format so that we can analyze new and old data together
create table queries_old_format engine File(TSVWithNamesAndTypes, 'queries.rep')
as select short, changed_fail, unstable_fail, left, right, diff, stat_threshold, test, query
as select short, changed_fail, unstable_fail, left, right, diff,
stat_threshold, test, query_display_name query
from queries
;
-- save all test runs as JSON for the new comparison page
create table all_query_funs_json engine File(JSON, 'report/all-query-runs.json') as
select test, query, versions_runs[1] runs_left, versions_runs[2] runs_right
create table all_query_runs_json engine File(JSON, 'report/all-query-runs.json') as
select test, query_index, query_display_name query,
left, right, diff, stat_threshold, report_threshold,
versions_runs[1] runs_left, versions_runs[2] runs_right
from (
select
test, query,
test, query_index,
groupArrayInsertAt(runs, version) versions_runs
from (
select
replaceAll(_file, '-queries.tsv', '') test,
query, version,
groupArray(time) runs
from file('*-queries.tsv', TSV, 'query text, run int, version UInt32, time float')
group by test, query, version
test, query_index, version,
groupArray(metrics[1]) runs
from file('analyze/query-run-metrics.tsv', TSV,
'test text, query_index int, run int, version UInt8, metrics Array(float)')
group by test, query_index, version
)
group by test, query
)
group by test, query_index
) runs
left join query_display_names
on runs.test = query_display_names.test
and runs.query_index = query_display_names.query_index
left join file('analyze/report-thresholds.tsv',
TSV, 'test text, report_threshold float') thresholds
on runs.test = thresholds.test
left join query_metric_stats
on runs.test = query_metric_stats.test
and runs.query_index = query_metric_stats.query_index
where
query_metric_stats.metric_name = 'server_time'
;
create table changed_perf_tsv engine File(TSV, 'report/changed-perf.tsv') as
select left, right, diff, stat_threshold, changed_fail, test, query from queries where changed_show
order by abs(diff) desc;
select left, right, diff, stat_threshold, changed_fail, test, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_tsv engine File(TSV, 'report/unstable-queries.tsv') as
select left, right, diff, stat_threshold, unstable_fail, test, query from queries where unstable_show
order by stat_threshold desc;
select left, right, diff, stat_threshold, unstable_fail, test, query_display_name
from queries where unstable_show order by stat_threshold desc;
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes, 'report/queries-for-flamegraph.tsv') as
select query, test from queries where unstable_show or changed_show
create table queries_for_flamegraph engine File(TSVWithNamesAndTypes,
'report/queries-for-flamegraph.tsv') as
select test, query_index from queries where unstable_show or changed_show
;
create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
@ -350,23 +427,23 @@ create table unstable_tests_tsv engine File(TSV, 'report/bad-tests.tsv') as
group by test having s > 0 order by s desc;
create table query_time engine Memory as select *
from file('client-times.tsv', TSV, 'test text, query text, client float, server float');
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');
create table wall_clock engine Memory as select *
from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float');
create table slow_on_client_tsv engine File(TSV, 'report/slow-on-client.tsv') as
select client, server, floor(client/server, 3) p, query
from query_time where p > 1.02 order by p desc;
select client, server, floor(client/server, 3) p, query_display_name
from query_time left join query_display_names using (test, query_index)
where p > 1.02 order by p desc;
create table test_time engine Memory as
select test, sum(client) total_client_time,
maxIf(client, not short) query_max,
minIf(client, not short) query_min,
count(*) queries,
sum(short) short_queries
from query_time full join queries
using test, query
count(*) queries, sum(short) short_queries
from query_time full join queries using (test, query_index)
group by test;
create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
@ -378,144 +455,207 @@ create table test_times_tsv engine File(TSV, 'report/test-times.tsv') as
floor(real / queries, 3) avg_real_per_query,
floor(query_min, 3)
from test_time
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock using test
-- wall clock times are also measured for skipped tests, so don't
-- do full join
left join wall_clock using test
order by avg_real_per_query desc;
-- report for all queries page, only main metric
create table all_tests_tsv engine File(TSV, 'report/all-queries.tsv') as
select changed_fail, unstable_fail,
left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query
from queries order by test, query;
stat_threshold, test, query_display_name
from queries order by test, query_display_name;
-- new report for all queries with all metrics (no page yet)
create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.tsv') as
select metric_name, left, right, diff,
floor(left > right ? left / right : right / left, 3),
stat_threshold, test, query_index, query_display_name
from query_metric_stats
order by test, query_index;
" 2> >(tee -a report/errors.log 1>&2)
for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv
do
# FIXME This loop builds column definitons from TSVWithNamesAndTypes in an
# absolutely atrocious way. This should be done by the file() function itself.
paste -d' ' \
<(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \
<(sed -n '2{s/\t/\n/g;p;q}' "$x" ) \
| tr '\n' ', ' | sed 's/,$//' > "$x.columns"
done
# Prepare source data for metrics and flamegraphs for unstable queries.
for version in {right,left}
do
clickhouse-local --query "
rm -rf data
clickhouse-local --query "
create view queries_for_flamegraph as
select * from file('report/queries-for-flamegraph.tsv', TSVWithNamesAndTypes,
'query text, test text');
'test text, query_index int');
create view query_runs as
with 0 as left, 1 as right
select * from file('analyze/query-runs.tsv', TSV,
'test text, query_index int, query_id text, version UInt8, time float')
where version = $version
;
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
;
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as
select test, query_index, query_display_name, query_id
from query_runs
join queries_for_flamegraph on
query_runs.test = queries_for_flamegraph.test
and query_runs.query_index = queries_for_flamegraph.query_index
left join query_display_names on
query_runs.test = query_display_names.test
and query_runs.query_index = query_display_names.query_index
;
create view query_log as select *
from file('$version-query-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-query-log.tsv.columns")');
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select
test, query_index, query_id,
ProfileEvents.Values value, ProfileEvents.Names metric
from query_log array join ProfileEvents
join unstable_query_runs using (query_id)
;
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
'unstable-run-metrics-2.$version.rep') as
select
test, query_index, query_id,
v, n
from (
select
test, query_index, query_id,
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
[memory_usage, read_bytes, written_bytes, query_duration_ms] v
from query_log
join unstable_query_runs using (query_id)
)
array join v, n;
create view trace_log as select *
from file('$version-trace-log.tsv', TSVWithNamesAndTypes,
'$(cat "$version-trace-log.tsv.columns")');
create view addresses_src as select *
create view addresses_src as select addr,
-- Some functions change name between builds, e.g. '__clone' or 'clone' or
-- even '__GI__clone@@GLIBC_2.32'. This breaks differential flame graphs, so
-- filter them out here.
[name, 'clone.S (filtered by script)', 'pthread_cond_timedwait (filtered by script)']
-- this line is a subscript operator of the above array
[1 + multiSearchFirstIndex(name, ['clone.S', 'pthread_cond_timedwait'])] name
from file('$version-addresses.tsv', TSVWithNamesAndTypes,
'$(cat "$version-addresses.tsv.columns")');
create table addresses_join_$version engine Join(any, left, address) as
select addr address, name from addresses_src;
create table unstable_query_runs engine File(TSVWithNamesAndTypes,
'unstable-query-runs.$version.rep') as
select query, query_id from query_log
where query in (select query from queries_for_flamegraph)
and query_id not like 'prewarm %'
;
create table unstable_query_log engine File(Vertical,
'unstable-query-log.$version.rep') as
select * from query_log
where query_id in (select query_id from unstable_query_runs);
create table unstable_run_metrics engine File(TSVWithNamesAndTypes,
'unstable-run-metrics.$version.rep') as
select ProfileEvents.Values value, ProfileEvents.Names metric, query_id, query
from query_log array join ProfileEvents
where query_id in (select query_id from unstable_query_runs)
;
create table unstable_run_metrics_2 engine File(TSVWithNamesAndTypes,
'unstable-run-metrics-2.$version.rep') as
select v, n, query_id, query
from
(select
['memory_usage', 'read_bytes', 'written_bytes', 'query_duration_ms'] n,
[memory_usage, read_bytes, written_bytes, query_duration_ms] v,
query,
query_id
from query_log
where query_id in (select query_id from unstable_query_runs))
array join n, v;
create table unstable_run_traces engine File(TSVWithNamesAndTypes,
'unstable-run-traces.$version.rep') as
select
test, query_index, query_id,
count() value,
joinGet(addresses_join_$version, 'name', arrayJoin(trace)) metric,
unstable_query_runs.query_id,
any(unstable_query_runs.query) query
from unstable_query_runs
join trace_log on trace_log.query_id = unstable_query_runs.query_id
group by unstable_query_runs.query_id, metric
joinGet(addresses_join_$version, 'name', arrayJoin(trace))
|| '(' || toString(trace_type) || ')' metric
from trace_log
join unstable_query_runs using query_id
group by test, query_index, query_id, metric
order by count() desc
;
create table metric_devation engine File(TSVWithNamesAndTypes,
'metric-deviation.$version.rep') as
select query, floor((q[3] - q[1])/q[2], 3) d,
quantilesExact(0, 0.5, 1)(value) q, metric
from (select * from unstable_run_metrics
union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm
join queries_for_flamegraph using query
group by query, metric
having d > 0.5
order by query desc, d desc
'report/metric-deviation.$version.tsv') as
-- first goes the key used to split the file with grep
select test, query_index, query_display_name,
d, q, metric
from (
select
test, query_index,
floor((q[3] - q[1])/q[2], 3) d,
quantilesExact(0, 0.5, 1)(value) q, metric
from (select * from unstable_run_metrics
union all select * from unstable_run_traces
union all select * from unstable_run_metrics_2) mm
group by test, query_index, metric
having d > 0.5
) metrics
left join query_display_names using (test, query_index)
order by test, query_index, d desc
;
create table stacks engine File(TSV, 'stacks.$version.rep') as
create table stacks engine File(TSV, 'report/stacks.$version.tsv') as
select
query,
-- first goes the key used to split the file with grep
test, query_index, trace_type, any(query_display_name),
-- next go the stacks in flamegraph format: 'func1;...;funcN count'
arrayStringConcat(
arrayMap(x -> joinGet(addresses_join_$version, 'name', x),
arrayMap(
addr -> joinGet(addresses_join_$version, 'name', addr),
arrayReverse(trace)
),
';'
) readable_trace,
count()
count() c
from trace_log
join unstable_query_runs using query_id
group by query, trace
group by test, query_index, trace_type, trace
order by test, query_index, trace_type, trace
;
" 2> >(tee -a report/errors.log 1>&2) # do not run in parallel because they use the same data dir for StorageJoins which leads to weird errors.
done
wait
# Create per-query flamegraphs
IFS=$'\n'
for version in {right,left}
do
for query in $(cut -d' ' -f1 "stacks.$version.rep" | sort | uniq)
for query in $(cut -d' ' -f1-4 "report/stacks.$version.tsv" | sort | uniq)
do
query_file=$(echo "$query" | cut -c-120 | sed 's/[/]/_/g')
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
echo "$query_file" >> report/query-files.txt
# Build separate .svg flamegraph for each query.
grep -F "$query " "stacks.$version.rep" \
| cut -d' ' -f 2- \
# -F is somewhat unsafe because it might match not the beginning of the
# string, but this is unlikely and escaping the query for grep is a pain.
grep -F "$query " "report/stacks.$version.tsv" \
| cut -f 5- \
| sed 's/\t/ /g' \
| tee "$query_file.stacks.$version.rep" \
| ~/fg/flamegraph.pl > "$query_file.$version.svg" &
| tee "report/tmp/$query_file.stacks.$version.tsv" \
| ~/fg/flamegraph.pl --hash > "$query_file.$version.svg" &
done
done
wait
unset IFS
# Copy metric stats into separate files as well.
grep -F "$query " "metric-deviation.$version.rep" \
| cut -f2- > "$query_file.$version.metrics.rep" &
# Create differential flamegraphs.
IFS=$'\n'
for query_file in $(cat report/query-files.txt)
do
~/fg/difffolded.pl "report/tmp/$query_file.stacks.left.tsv" \
"report/tmp/$query_file.stacks.right.tsv" \
| tee "report/tmp/$query_file.stacks.diff.tsv" \
| ~/fg/flamegraph.pl > "$query_file.diff.svg" &
done
unset IFS
wait
# Create per-query files with metrics. Note that the key is different from flamegraphs.
IFS=$'\n'
for version in {right,left}
do
for query in $(cut -d' ' -f1-3 "report/metric-deviation.$version.tsv" | sort | uniq)
do
query_file=$(echo "$query" | cut -c-120 | sed 's/[/ ]/_/g')
# Ditto the above comment about -F.
grep -F "$query " "report/metric-deviation.$version.tsv" \
| cut -f4- > "$query_file.$version.metrics.rep" &
done
done
wait

View File

@ -46,7 +46,13 @@ function download
done
mkdir ~/fg ||:
cd ~/fg && wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl" && chmod +x ~/fg/flamegraph.pl &
(
cd ~/fg
wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/flamegraph.pl"
wget -nv -nd -c "https://raw.githubusercontent.com/brendangregg/FlameGraph/master/difffolded.pl"
chmod +x ~/fg/difffolded.pl
chmod +x ~/fg/flamegraph.pl
) &
wait
}

View File

@ -81,8 +81,13 @@ if [ "$REF_PR" == "" ]; then echo Reference PR is not specified ; exit 1 ; fi
fi
) | tee right-commit.txt
# Prepare the list of changed tests for use by compare.sh
git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST"~ master)" -- tests/performance | tee changed-tests.txt
if [ "$PR_TO_TEST" != "0" ]
then
# Prepare the list of tests changed in the PR for use by compare.sh. Compare to
# merge base, because master might be far in the future and have unrelated test
# changes.
git -C ch diff --name-only "$SHA_TO_TEST" "$(git -C ch merge-base "$SHA_TO_TEST" master)" -- tests/performance | tee changed-tests.txt
fi
# Set python output encoding so that we can print queries with Russian letters.
export PYTHONIOENCODING=utf-8
@ -119,5 +124,5 @@ done
dmesg -T > dmesg.log
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} ./report
7z a /output/output.7z ./*.{log,tsv,html,txt,rep,svg} {right,left}/{performance,db/preprocessed_configs,scripts} report analyze
cp compare.log /output

View File

@ -1,32 +1,37 @@
-- input is table(query text, run UInt32, version int, time float)
-- input is table(test text, query text, run UInt32, version int, metrics Array(float))
select
floor(original_medians_array.time_by_version[1], 4) l,
floor(original_medians_array.time_by_version[2], 4) r,
floor((r - l) / l, 3) diff_percent,
floor(threshold / l, 3) threshold_percent,
query
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
arrayMap(x, y -> floor((y - x) / x, 3), l, r) diff_percent,
arrayMap(x, y -> floor(x / y, 3), threshold, l) threshold_percent,
test, query
from
(
-- quantiles of randomization distributions
select quantileExact(0.999)(abs(time_by_label[1] - time_by_label[2]) as d) threshold
select quantileExactForEach(0.999)(
arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
) threshold
---- uncomment to see what the distribution is really like
--, uniqExact(d) u
--, uniqExact(d.1) u
--, arraySort(x->x.1,
-- arrayZip(
-- (sumMap([d], [1]) as f).1,
-- (sumMap([d.1], [1]) as f).1,
-- f.2)) full_histogram
from
(
select virtual_run, groupArrayInsertAt(median_time, random_label) time_by_label -- make array 'random label' -> 'median time'
-- make array 'random label' -> '[median metric]'
select virtual_run, groupArrayInsertAt(median_metrics, random_label) metrics_by_label
from (
select medianExact(time) median_time, virtual_run, random_label -- get median times, grouping by random label
-- get [median metric] arrays among virtual runs, grouping by random label
select medianExactForEach(metrics) median_metrics, virtual_run, random_label
from (
select *, toUInt32(rowNumberInAllBlocks() % 2) random_label -- randomly relabel measurements
-- randomly relabel measurements
select *, toUInt32(rowNumberInAllBlocks() % 2) random_label
from (
select time, number virtual_run
select metrics, number virtual_run
from
-- strip the query away before the join -- it might be several kB long;
(select time, run, version from table) no_query,
(select metrics, run, version from table) no_query,
-- duplicate input measurements into many virtual runs
numbers(1, 100000) nn
-- for each virtual run, randomly reorder measurements
@ -40,19 +45,19 @@ from
-- this select aggregates by virtual_run
) rd,
(
select groupArrayInsertAt(median_time, version) time_by_version
select groupArrayInsertAt(median_metrics, version) medians_by_version
from
(
select medianExact(time) median_time, version
select medianExactForEach(metrics) median_metrics, version
from table
group by version
) original_medians
) original_medians_array,
(
select any(query) query from table
select any(test) test, any(query) query from table
) any_query,
(
select throwIf(uniq(query) != 1) from table
select throwIf(uniq((test, query)) != 1) from table
) check_single_query -- this subselect checks that there is only one query in the input table;
-- written this way so that it is not optimized away (#10523)
;

View File

@ -11,6 +11,9 @@ import string
import time
import traceback
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
stage_start_seconds = time.perf_counter()
def report_stage_end(stage_name):
@ -29,6 +32,8 @@ parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS',
parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.')
args = parser.parse_args()
test_name = os.path.splitext(os.path.basename(args.file[0].name))[0]
tree = et.parse(args.file[0])
root = tree.getroot()
@ -110,8 +115,9 @@ for t in tables:
try:
res = c.execute("select 1 from {} limit 1".format(t))
except:
print('skipped\t' + traceback.format_exception_only(*sys.exc_info()[:2])[-1])
traceback.print_exc()
exception_message = traceback.format_exception_only(*sys.exc_info()[:2])[-1]
skipped_message = ' '.join(exception_message.split('\n')[:2])
print(f'skipped\t{tsv_escape(skipped_message)}')
sys.exit(0)
report_stage_end('preconditions')
@ -133,27 +139,30 @@ for c in connections:
report_stage_end('fill')
# Run test queries
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
test_query_templates = [q.text for q in root.findall('query')]
test_queries = substitute_parameters(test_query_templates)
report_stage_end('substitute2')
for i, q in enumerate(test_queries):
for query_index, q in enumerate(test_queries):
query_prefix = f'{test_name}.query{query_index}'
# We have some crazy long queries (about 100kB), so trim them to a sane
# length.
# length. This means we can't use query text as an identifier and have to
# use the test name + the test-wide query index.
query_display_name = q
if len(query_display_name) > 1000:
query_display_name = f'{query_display_name[:1000]}...({i})'
query_display_name = f'{query_display_name[:1000]}...({query_index})'
print(f'display-name\t{query_index}\t{tsv_escape(query_display_name)}')
# Prewarm: run once on both servers. Helps to bring the data into memory,
# precompile the queries, etc.
try:
for conn_index, c in enumerate(connections):
res = c.execute(q, query_id = f'prewarm {0} {query_display_name}')
print(f'prewarm\t{tsv_escape(query_display_name)}\t{conn_index}\t{c.last_query.elapsed}')
prewarm_id = f'{query_prefix}.prewarm0'
res = c.execute(q, query_id = prewarm_id)
print(f'prewarm\t{query_index}\t{prewarm_id}\t{conn_index}\t{c.last_query.elapsed}')
except KeyboardInterrupt:
raise
except:
@ -172,13 +181,14 @@ for i, q in enumerate(test_queries):
start_seconds = time.perf_counter()
server_seconds = 0
for run in range(0, args.runs):
run_id = f'{query_prefix}.run{run}'
for conn_index, c in enumerate(connections):
res = c.execute(q)
print(f'query\t{tsv_escape(query_display_name)}\t{run}\t{conn_index}\t{c.last_query.elapsed}')
res = c.execute(q, query_id = run_id)
print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
server_seconds += c.last_query.elapsed
client_seconds = time.perf_counter() - start_seconds
print(f'client-time\t{tsv_escape(query_display_name)}\t{client_seconds}\t{server_seconds}')
print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')
report_stage_end('benchmark')

View File

@ -25,6 +25,9 @@ very_unstable_queries = 0
# max seconds to run one query by itself, not counting preparation
allowed_single_run_time = 2
color_bad='#ffb0c0'
color_good='#b0d050'
header_template = """
<!DOCTYPE html>
<html>
@ -179,6 +182,16 @@ if args.report == 'main':
print_tested_commits()
run_error_rows = tsvRows('run-errors.tsv')
error_tests += len(run_error_rows)
printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client',
['Client time, s', 'Server time, s', 'Ratio', 'Query'],
slow_on_client_rows)
def print_changes():
rows = tsvRows('report/changed-perf.tsv')
if not rows:
@ -188,8 +201,8 @@ if args.report == 'main':
print(tableStart('Changes in performance'))
columns = [
'Old, s.', # 0
'New, s.', # 1
'Old, s', # 0
'New, s', # 1
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', # 2
'p&nbsp;<&nbsp;0.001 threshold', # 3
# Failed # 4
@ -205,10 +218,10 @@ if args.report == 'main':
if int(row[4]):
if float(row[2]) < 0.:
faster_queries += 1
attrs[2] = 'style="background: #00ff00"'
attrs[2] = f'style="background: {color_good}"'
else:
slower_queries += 1
attrs[2] = 'style="background: #ff0000"'
attrs[2] = f'style="background: {color_bad}"'
else:
attrs[2] = ''
@ -218,12 +231,6 @@ if args.report == 'main':
print_changes()
slow_on_client_rows = tsvRows('report/slow-on-client.tsv')
error_tests += len(slow_on_client_rows)
printSimpleTable('Slow on client',
['Client time, s.', 'Server time, s.', 'Ratio', 'Query'],
slow_on_client_rows)
def print_unstable_queries():
global unstable_queries
global very_unstable_queries
@ -252,7 +259,7 @@ if args.report == 'main':
for r in unstable_rows:
if int(r[4]):
very_unstable_queries += 1
attrs[3] = 'style="background: #ffb0a0"'
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[3] = ''
@ -262,11 +269,7 @@ if args.report == 'main':
print_unstable_queries()
run_error_rows = tsvRows('run-errors.tsv')
error_tests += len(run_error_rows)
printSimpleTable('Run errors', ['Test', 'Error'], run_error_rows)
skipped_tests_rows = tsvRows('skipped-tests.tsv')
skipped_tests_rows = tsvRows('analyze/skipped-tests.tsv')
printSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)
printSimpleTable('Tests with most unstable queries',
@ -281,13 +284,13 @@ if args.report == 'main':
columns = [
'Test', #0
'Wall clock time, s.', #1
'Total client time, s.', #2
'Wall clock time, s', #1
'Total client time, s', #2
'Total queries', #3
'Ignored short queries', #4
'Longest query<br>(sum for all runs), s.', #5
'Avg wall clock time<br>(sum for all runs), s.', #6
'Shortest query<br>(sum for all runs), s.', #7
'Longest query<br>(sum for all runs), s', #5
'Avg wall clock time<br>(sum for all runs), s', #6
'Shortest query<br>(sum for all runs), s', #7
]
print(tableStart('Test times'))
@ -300,13 +303,13 @@ if args.report == 'main':
if float(r[6]) > 1.5 * total_runs:
# FIXME should be 15s max -- investigate parallel_insert
slow_average_tests += 1
attrs[6] = 'style="background: #ffb0a0"'
attrs[6] = f'style="background: {color_bad}"'
else:
attrs[6] = ''
if float(r[5]) > allowed_single_run_time * total_runs:
slow_average_tests += 1
attrs[5] = 'style="background: #ffb0a0"'
attrs[5] = f'style="background: {color_bad}"'
else:
attrs[5] = ''
@ -320,9 +323,9 @@ if args.report == 'main':
print("""
<p class="links">
<a href="output.7z">Test output</a>
<a href="all-queries.html">All queries</a>
<a href="compare.log">Log</a>
<a href="output.7z">Test output</a>
</p>
</body>
</html>
@ -382,8 +385,8 @@ elif args.report == 'all-queries':
columns = [
# Changed #0
# Unstable #1
'Old, s.', #2
'New, s.', #3
'Old, s', #2
'New, s', #3
'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #4
'Times speedup / slowdown', #5
'p&nbsp;<&nbsp;0.001 threshold', #6
@ -399,21 +402,21 @@ elif args.report == 'all-queries':
attrs[1] = None
for r in rows:
if int(r[1]):
attrs[6] = 'style="background: #ffb0a0"'
attrs[6] = f'style="background: {color_bad}"'
else:
attrs[6] = ''
if int(r[0]):
if float(r[4]) > 0.:
attrs[4] = 'style="background: #ffb0a0"'
attrs[4] = f'style="background: {color_bad}"'
else:
attrs[4] = 'style="background: #adbdff"'
attrs[4] = f'style="background: {color_good}"'
else:
attrs[4] = ''
if (float(r[2]) + float(r[3])) / 2 > allowed_single_run_time:
attrs[2] = 'style="background: #ffb0a0"'
attrs[3] = 'style="background: #ffb0a0"'
attrs[2] = f'style="background: {color_bad}"'
attrs[3] = f'style="background: {color_bad}"'
else:
attrs[2] = ''
attrs[3] = ''
@ -428,9 +431,9 @@ elif args.report == 'all-queries':
print("""
<p class="links">
<a href="output.7z">Test output</a>
<a href="report.html">Main report</a>
<a href="compare.log">Log</a>
<a href="output.7z">Test output</a>
</p>
</body>
</html>

View File

@ -7,7 +7,7 @@ Building of ClickHouse is supported on Linux, FreeBSD and Mac OS X.
# If You Use Windows {#if-you-use-windows}
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/\#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
If you use Windows, you need to create a virtual machine with Ubuntu. To start working with a virtual machine please install VirtualBox. You can download Ubuntu from the website: https://www.ubuntu.com/#download. Please create a virtual machine from the downloaded image (you should reserve at least 4GB of RAM for it). To run a command-line terminal in Ubuntu, please locate a program containing the word “terminal” in its name (gnome-terminal, konsole etc.) or just press Ctrl+Alt+T.
# If You Use a 32-bit System {#if-you-use-a-32-bit-system}

View File

@ -1,6 +1,8 @@
---
toc_folder_title: Engines
toc_priority: 25
toc_title: hidden
toc_hidden: true
---
{## [Original article](https://clickhouse.tech/docs/en/engines/) ##}

View File

@ -72,7 +72,7 @@ Examples:
kafka_format = 'JSONEachRow',
kafka_num_consumers = 4;
CREATE TABLE queue2 (
CREATE TABLE queue3 (
timestamp UInt64,
level String,
message String

View File

@ -1,6 +1,9 @@
---
toc_folder_title: F.A.Q.
toc_priority: 76
toc_title: hidden
toc_hidden: true
---
{## [Original article](https://clickhouse.tech/docs/en/faq) ##}

View File

@ -1543,20 +1543,32 @@ It represents an unbiased estimate of the variance of a random variable if passe
Returns `Float64`. When `n <= 1`, returns `+∞`.
!!! note "Note"
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
## varPop(x) {#varpopx}
Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
In other words, dispersion for a set of values. Returns `Float64`.
!!! note "Note"
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
## stddevSamp(x) {#stddevsampx}
The result is equal to the square root of `varSamp(x)`.
!!! note "Note"
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error.
## stddevPop(x) {#stddevpopx}
The result is equal to the square root of `varPop(x)`.
!!! note "Note"
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error.
## topK(N)(x) {#topknx}
Returns an array of the approximately most frequent values in the specified column. The resulting array is sorted in descending order of approximate frequency of values (not by the values themselves).
@ -1641,14 +1653,23 @@ Calculates the value of `Σ((x - x̅)(y - y̅)) / (n - 1)`.
Returns Float64. When `n <= 1`, returns +∞.
!!! note "Note"
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarSampStable` function. It works slower but provides a lower computational error.
## covarPop(x, y) {#covarpopx-y}
Calculates the value of `Σ((x - x̅)(y - y̅)) / n`.
!!! note "Note"
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `covarPopStable` function. It works slower but provides a lower computational error.
## corr(x, y) {#corrx-y}
Calculates the Pearson correlation coefficient: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.
!!! note "Note"
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `corrStable` function. It works slower but provides a lower computational error.
## categoricalInformationValue {#categoricalinformationvalue}
Calculates the value of `(P(tag = 1) - P(tag = 0))(log(P(tag = 1)) - log(P(tag = 0)))` for each category.

View File

@ -11,7 +11,7 @@ All the functions accept zero arguments or one argument.
If an argument is passed, it can be any type, and its value is not used for anything.
The only purpose of this argument is to prevent common subexpression elimination, so that two different instances of the same function return different columns with different random numbers.
## rand {#rand}
## rand, rand32 {#rand}
Returns a pseudo-random UInt32 number, evenly distributed among all UInt32-type numbers.
Uses a linear congruential generator.

View File

@ -28,9 +28,10 @@ There may be any number of space symbols between syntactical constructions (incl
## Comments {#comments}
ClickHouse supports either SQL-style and C-style comments.
SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
ClickHouse supports either SQL-style and C-style comments:
- SQL-style comments start with `--` and continue to the end of the line, a space after `--` can be omitted.
- C-style are from `/*` to `*/`and can be multiline, spaces are not required either.
## Keywords {#syntax-keywords}

View File

@ -291,30 +291,30 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE minmax GRANULARITY
INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARITY 4
```
#### Поддержка для функций {#podderzhka-dlia-funktsii}
#### Поддержка для функций {#functions-support}
Условия в секции `WHERE` содержат вызовы функций, оперирующих со столбцами. Если столбец - часть индекса, ClickHouse пытается использовать индекс при выполнении функции. Для разных видов индексов, ClickHouse поддерживает различные наборы функций, которые могут использоваться индексами.
Индекс `set` используется со всеми функциями. Наборы функций для остальных индексов представлены в таблице ниже.
| Function (operator) / Index | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter |
|----------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../engines/table-engines/mergetree-family/mergetree.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../engines/table-engines/mergetree-family/mergetree.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../engines/table-engines/mergetree-family/mergetree.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ |
| [notLike](../../../engines/table-engines/mergetree-family/mergetree.md#function-notlike) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [startsWith](../../../engines/table-engines/mergetree-family/mergetree.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [endsWith](../../../engines/table-engines/mergetree-family/mergetree.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [multiSearchAny](../../../engines/table-engines/mergetree-family/mergetree.md#function-multisearchany) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [in](../../../engines/table-engines/mergetree-family/mergetree.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](../../../engines/table-engines/mergetree-family/mergetree.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (\<)](../../../engines/table-engines/mergetree-family/mergetree.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../engines/table-engines/mergetree-family/mergetree.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../engines/table-engines/mergetree-family/mergetree.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../engines/table-engines/mergetree-family/mergetree.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](../../../engines/table-engines/mergetree-family/mergetree.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](../../../engines/table-engines/mergetree-family/mergetree.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
| Функция (оператор) / Индекс | primary key | minmax | ngrambf\_v1 | tokenbf\_v1 | bloom\_filter |
|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------|
| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✗ | ✗ |
| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✔ | ✗ | ✗ |
| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ |
| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ |
| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ |
| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ |
| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
Функции с постоянным агрументом, который меньше, чем размер ngram не могут использовать индекс `ngrambf_v1` для оптимизации запроса.

View File

@ -113,7 +113,7 @@ ClickHouse может слить куски данных таким образо
Если название вложенной таблицы заканчивается на `Map` и она содержит не менее двух столбцов, удовлетворяющих критериям:
- первый столбец - числовой `(*Int*, Date, DateTime)`, назовем его условно `key`,
- первый столбец - числовой `(*Int*, Date, DateTime)` или строковый `(String, FixedString)`, назовем его условно `key`,
- остальные столбцы - арифметические `(*Int*, Float32/64)`, условно `(values...)`,
то вложенная таблица воспринимается как отображение `key => (values...)` и при слиянии её строк выполняется слияние элементов двух множеств по `key` со сложением соответствующих `(values...)`.

View File

@ -45,6 +45,7 @@
- [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net)
- Elixir
- [clickhousex](https://github.com/appodeal/clickhousex/)
- [pillar](https://github.com/sofakingworld/pillar)
- Nim
- [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse)

View File

@ -1533,20 +1533,33 @@ SELECT medianDeterministic(val, 1) FROM t
Возвращает `Float64`. В случае, когда `n <= 1`, возвращается `+∞`.
!!! note "Примечание"
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varSampStable`. Она работает медленнее, но обеспечиват меньшую вычислительную ошибку.
## varPop(x) {#varpopx}
Вычисляет величину `Σ((x - x̅)^2) / n`, где `n` - размер выборки, `x̅`- среднее значение `x`.
То есть, дисперсию для множества значений. Возвращает `Float64`.
!!! note "Примечание"
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
## stddevSamp(x) {#stddevsampx}
Результат равен квадратному корню от `varSamp(x)`.
!!! note "Примечание"
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
## stddevPop(x) {#stddevpopx}
Результат равен квадратному корню от `varPop(x)`.
!!! note "Примечание"
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
## topK(N)(column) {#topkncolumn}
Возвращает массив наиболее часто встречающихся значений в указанном столбце. Результирующий массив упорядочен по убыванию частоты значения (не по самим значениям).
@ -1626,14 +1639,24 @@ SELECT topKWeighted(10)(number, number) FROM numbers(1000)
Возвращает Float64. В случае, когда `n <= 1`, возвращается +∞.
!!! note "Примечание"
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
## covarPop(x, y) {#covarpopx-y}
Вычисляет величину `Σ((x - x̅)(y - y̅)) / n`.
!!! note "Примечание"
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
## corr(x, y) {#corrx-y}
Вычисляет коэффициент корреляции Пирсона: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`.
!!! note "Примечание"
Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `corrStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку.
## simpleLinearRegression {#simplelinearregression}
Выполняет простую (одномерную) линейную регрессию.

View File

@ -174,7 +174,7 @@ Upd. Всё ещё ждём удаление старого кода, котор
### 2.3. Перенос столбцового ser/de из DataType в Column {#perenos-stolbtsovogo-serde-iz-datatype-v-column}
В очереди.
В очереди. Антон Попов.
### 2.4. Перевод LowCardinality из DataType в Column. Добавление ColumnSparse {#perevod-lowcardinality-iz-datatype-v-column-dobavlenie-columnsparse}
@ -977,10 +977,10 @@ Q2.
[Виталий Баранов](https://github.com/vitlibar) и Денис Глазачев, Altinity. Требует 12.1.
### 12.6. Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
### 12.6. + Информация о пользователях и квотах в системной таблице {#informatsiia-o-polzovateliakh-i-kvotakh-v-sistemnoi-tablitse}
[Виталий Баранов](https://github.com/vitlibar). Требует 12.1.
Есть pull request. Q2.
Есть pull request. Q2. Готово.
## 13. Разделение ресурсов, multi-tenancy {#razdelenie-resursov-multi-tenancy}

View File

@ -58,17 +58,6 @@ def build_for_lang(lang, args):
'custom_dir': os.path.join(os.path.dirname(__file__), '..', args.theme_dir),
'language': lang,
'direction': 'rtl' if lang == 'fa' else 'ltr',
# TODO: cleanup
'feature': {
'tabs': False
},
'palette': {
'primary': 'white',
'accent': 'white'
},
'font': False,
'logo': 'images/logo.svg',
'favicon': 'assets/images/favicon.ico',
'static_templates': ['404.html'],
'extra': {
'now': int(time.mktime(datetime.datetime.now().timetuple())) # TODO better way to avoid caching

View File

@ -18,10 +18,10 @@ Markdown==3.2.1
MarkupSafe==1.1.1
mkdocs==1.1.2
mkdocs-htmlproofer-plugin==0.0.3
mkdocs-macros-plugin==0.4.7
mkdocs-macros-plugin==0.4.9
nltk==3.5
nose==1.3.7
protobuf==3.12.0
protobuf==3.12.1
numpy==1.18.4
Pygments==2.5.2
pymdown-extensions==7.1
@ -30,7 +30,7 @@ PyYAML==5.3.1
repackage==0.7.3
requests==2.23.0
singledispatch==3.4.0.3
six==1.14.0
six==1.15.0
soupsieve==2.0.1
termcolor==1.1.0
tornado==5.1.1

View File

@ -46,7 +46,7 @@ sudo yum-config-manager --add-repo https://repo.yandex.ru/clickhouse/rpm/stable/
sudo yum install clickhouse-server clickhouse-client
```
您也可以从此处手动下载和安装软件包https://repo.yandex.ru/clickhouse/rpm/stable/x86\_64。
您也可以从此处手动下载和安装软件包https://repo.yandex.ru/clickhouse/rpm/stable/x86_64。
### 来自Docker {#from-docker-image}

View File

@ -1,3 +1,7 @@
if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()
# 'clickhouse' binary is a multi purpose tool,
# that contain multiple execution modes (client, server, etc.)
# each of them is built and linked as a separate library, defined below.
@ -201,3 +205,9 @@ endif ()
if (TARGET clickhouse-server AND TARGET copy-headers)
add_dependencies(clickhouse-server copy-headers)
endif ()
if (ENABLE_TESTS AND USE_GTEST)
set (CLICKHOUSE_ALL_TESTS_TARGETS local_date_time_comparison unit_tests_libcommon unit_tests_dbms hashing_write_buffer hashing_read_buffer in_join_subqueries_preprocessor expression_analyzer)
add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_ALL_TESTS_TARGETS})
add_dependencies(clickhouse-bundle clickhouse-tests)
endif()

View File

@ -289,7 +289,7 @@ private:
connection_entries.emplace_back(std::make_shared<Entry>(
connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings))));
pool.scheduleOrThrowOnError(std::bind(&Benchmark::thread, this, connection_entries));
pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); });
}
}
catch (...)
@ -424,7 +424,7 @@ private:
std::cerr << percent << "%\t\t";
for (const auto & info : infos)
{
std::cerr << info->sampler.quantileNearest(percent / 100.0) << " sec." << "\t";
std::cerr << info->sampler.quantileNearest(percent / 100.0) << " sec.\t";
}
std::cerr << "\n";
};
@ -459,7 +459,7 @@ private:
auto print_percentile = [&json_out](Stats & info, auto percent, bool with_comma = true)
{
json_out << "\"" << percent << "\"" << ": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n");
json_out << "\"" << percent << "\": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n");
};
json_out << "{\n";
@ -469,7 +469,7 @@ private:
const auto & info = infos[i];
json_out << double_quote << connections[i]->getDescription() << ": {\n";
json_out << double_quote << "statistics" << ": {\n";
json_out << double_quote << "statistics: {\n";
print_key_value("QPS", info->queries / info->work_time);
print_key_value("RPS", info->read_rows / info->work_time);
@ -479,7 +479,7 @@ private:
print_key_value("num_queries", info->queries.load(), false);
json_out << "},\n";
json_out << double_quote << "query_time_percentiles" << ": {\n";
json_out << double_quote << "query_time_percentiles: {\n";
for (int percent = 0; percent <= 90; percent += 10)
print_percentile(*info, percent);

View File

@ -485,7 +485,7 @@ private:
history_file = config().getString("history_file");
else
{
auto history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
auto * history_file_from_env = getenv("CLICKHOUSE_HISTORY_FILE");
if (history_file_from_env)
history_file = history_file_from_env;
else if (!home_path.empty())
@ -1480,7 +1480,7 @@ private:
"\033[1m↗\033[0m",
};
auto indicator = indicators[increment % 8];
const char * indicator = indicators[increment % 8];
if (!send_logs && written_progress_chars)
message << '\r';

View File

@ -51,7 +51,7 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati
{
std::string prompt{"Password for user (" + user + "): "};
char buf[1000] = {};
if (auto result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0))
password = result;
}

View File

@ -5,6 +5,7 @@
#include <Client/Connection.h>
#include <IO/ConnectionTimeouts.h>
#include <common/LineReader.h>
#include <thread>
namespace DB

View File

@ -26,7 +26,7 @@ void ClusterCopier::init()
if (response.error != Coordination::ZOK)
return;
UInt64 version = ++task_description_version;
LOG_DEBUG(log, "Task description should be updated, local version " << version);
LOG_DEBUG(log, "Task description should be updated, local version {}", version);
};
task_description_path = task_zookeeper_path + "/description";
@ -47,7 +47,7 @@ void ClusterCopier::init()
task_table.initShards(task_cluster->random_engine);
}
LOG_DEBUG(log, "Will process " << task_cluster->table_tasks.size() << " table tasks");
LOG_DEBUG(log, "Will process {} table tasks", task_cluster->table_tasks.size());
/// Do not initialize tables, will make deferred initialization in process()
@ -85,7 +85,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
{
TaskTable & task_table = task_shard->task_table;
LOG_INFO(log, "Discover partitions of shard " << task_shard->getDescription());
LOG_INFO(log, "Discover partitions of shard {}", task_shard->getDescription());
auto get_partitions = [&] () { return getShardPartitions(timeouts, *task_shard); };
auto existing_partitions_names = retry(get_partitions, 60);
@ -132,8 +132,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
{
if (!task_table.enabled_partitions_set.count(partition_name))
{
LOG_DEBUG(log, "Partition " << partition_name << " will not be processed, since it is not in "
<< "enabled_partitions of " << task_table.table_id);
LOG_DEBUG(log, "Partition {} will not be processed, since it is not in enabled_partitions of {}", partition_name, task_table.table_id);
}
}
}
@ -165,11 +164,10 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
for (const String & missing_partition : missing_partitions)
ss << " " << missing_partition;
LOG_WARNING(log, "There are no " << missing_partitions.size() << " partitions from enabled_partitions in shard "
<< task_shard->getDescription() << " :" << ss.str());
LOG_WARNING(log, "There are no {} partitions from enabled_partitions in shard {} :{}", missing_partitions.size(), task_shard->getDescription(), ss.str());
}
LOG_DEBUG(log, "Will copy " << task_shard->partition_tasks.size() << " partitions from shard " << task_shard->getDescription());
LOG_DEBUG(log, "Will copy {} partitions from shard {}", task_shard->partition_tasks.size(), task_shard->getDescription());
}
void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads)
@ -181,7 +179,7 @@ void ClusterCopier::discoverTablePartitions(const ConnectionTimeouts & timeouts,
for (const TaskShardPtr & task_shard : task_table.all_shards)
thread_pool.scheduleOrThrowOnError([this, timeouts, task_shard]() { discoverShardPartitions(timeouts, task_shard); });
LOG_DEBUG(log, "Waiting for " << thread_pool.active() << " setup jobs");
LOG_DEBUG(log, "Waiting for {} setup jobs", thread_pool.active());
thread_pool.wait();
}
}
@ -205,7 +203,8 @@ void ClusterCopier::uploadTaskDescription(const std::string & task_path, const s
if (code && force)
zookeeper->createOrUpdate(local_task_description_path, task_config_str, zkutil::CreateMode::Persistent);
LOG_DEBUG(log, "Task description " << ((code && !force) ? "not " : "") << "uploaded to " << local_task_description_path << " with result " << code << " ("<< zookeeper->error2string(code) << ")");
LOG_DEBUG(log, "Task description {} uploaded to {} with result {} ({})",
((code && !force) ? "not " : ""), local_task_description_path, code, zookeeper->error2string(code));
}
void ClusterCopier::reloadTaskDescription()
@ -221,7 +220,7 @@ void ClusterCopier::reloadTaskDescription()
if (code)
throw Exception("Can't get description node " + task_description_path, ErrorCodes::BAD_ARGUMENTS);
LOG_DEBUG(log, "Loading description, zxid=" << task_description_current_stat.czxid);
LOG_DEBUG(log, "Loading description, zxid={}", task_description_current_stat.czxid);
auto config = getConfigurationFromXMLString(task_config_str);
/// Setup settings
@ -251,9 +250,7 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts)
{
for (TaskTable & task_table : task_cluster->table_tasks)
{
LOG_INFO(log, "Process table task " << task_table.table_id << " with "
<< task_table.all_shards.size() << " shards, "
<< task_table.local_shards.size() << " of them are local ones");
LOG_INFO(log, "Process table task {} with {} shards, {} of them are local ones", task_table.table_id, task_table.all_shards.size(), task_table.local_shards.size());
if (task_table.all_shards.empty())
continue;
@ -357,8 +354,7 @@ zkutil::EphemeralNodeHolder::Ptr ClusterCopier::createTaskWorkerNodeAndWaitIfNee
if (static_cast<UInt64>(stat.numChildren) >= task_cluster->max_workers)
{
LOG_DEBUG(log, "Too many workers (" << stat.numChildren << ", maximum " << task_cluster->max_workers << ")"
<< ". Postpone processing " << description);
LOG_DEBUG(log, "Too many workers ({}, maximum {}). Postpone processing {}", stat.numChildren, task_cluster->max_workers, description);
if (unprioritized)
current_sleep_time = std::min(max_sleep_time, current_sleep_time + default_sleep_time);
@ -419,7 +415,7 @@ bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_tabl
{
bool piece_is_done = checkPartitionPieceIsDone(task_table, partition_name, piece_number, shards_with_partition);
if (!piece_is_done)
LOG_DEBUG(log, "Partition " << partition_name << " piece " + toString(piece_number) + " is not already done.");
LOG_DEBUG(log, "Partition {} piece {} is not already done.", partition_name, piece_number);
answer &= piece_is_done;
}
@ -435,14 +431,13 @@ bool ClusterCopier::checkAllPiecesInPartitionAreDone(const TaskTable & task_tabl
bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name,
size_t piece_number, const TasksShard & shards_with_partition)
{
LOG_DEBUG(log, "Check that all shards processed partition " << partition_name
<< " piece " + toString(piece_number) + " successfully");
LOG_DEBUG(log, "Check that all shards processed partition {} piece {} successfully", partition_name, piece_number);
auto zookeeper = context.getZooKeeper();
/// Collect all shards that contain partition piece number piece_number.
Strings piece_status_paths;
for (auto & shard : shards_with_partition)
for (const auto & shard : shards_with_partition)
{
ShardPartition & task_shard_partition = shard->partition_tasks.find(partition_name)->second;
ShardPartitionPiece & shard_partition_piece = task_shard_partition.pieces[piece_number];
@ -465,8 +460,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
TaskStateWithOwner status = TaskStateWithOwner::fromString(res.data);
if (status.state != TaskState::Finished)
{
LOG_INFO(log, "The task " << res.data << " is being rewritten by "
<< status.owner << ". Partition piece will be rechecked");
LOG_INFO(log, "The task {} is being rewritten by {}. Partition piece will be rechecked", res.data, status.owner);
return false;
}
@ -484,7 +478,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
if (!is_clean)
{
LOG_INFO(log, "Partition " << partition_name << " become dirty");
LOG_INFO(log, "Partition {} become dirty", partition_name);
return false;
}
@ -501,8 +495,7 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
}
catch (const Coordination::Exception & e)
{
LOG_INFO(log, "A ZooKeeper error occurred while checking partition " << partition_name << " piece number "
<< toString(piece_number) << ". Will recheck the partition. Error: " << e.displayText());
LOG_INFO(log, "A ZooKeeper error occurred while checking partition {} piece number {}. Will recheck the partition. Error: {}", partition_name, toString(piece_number), e.displayText());
return false;
}
@ -511,12 +504,12 @@ bool ClusterCopier::checkPartitionPieceIsDone(const TaskTable & task_table, cons
{
if (zxid1[shard_num] != zxid2[shard_num])
{
LOG_INFO(log, "The task " << piece_status_paths[shard_num] << " is being modified now. Partition piece will be rechecked");
LOG_INFO(log, "The task {} is being modified now. Partition piece will be rechecked", piece_status_paths[shard_num]);
return false;
}
}
LOG_INFO(log, "Partition " << partition_name << " piece number " << toString(piece_number) << " is copied successfully");
LOG_INFO(log, "Partition {} piece number {} is copied successfully", partition_name, toString(piece_number));
return true;
}
@ -530,7 +523,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
inject_fault = value < move_fault_probability;
}
LOG_DEBUG(log, "Try to move " << partition_name << " to destionation table");
LOG_DEBUG(log, "Try to move {} to destionation table", partition_name);
auto zookeeper = context.getZooKeeper();
@ -548,7 +541,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
{
if (e.code == Coordination::ZNODEEXISTS)
{
LOG_DEBUG(log, "Someone is already moving pieces " << current_partition_attach_is_active);
LOG_DEBUG(log, "Someone is already moving pieces {}", current_partition_attach_is_active);
return TaskStatus::Active;
}
@ -565,16 +558,13 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data);
if (status.state == TaskState::Finished)
{
LOG_DEBUG(log, "All pieces for partition from this task " << current_partition_attach_is_active
<< " has been successfully moved to destination table by " << status.owner);
LOG_DEBUG(log, "All pieces for partition from this task {} has been successfully moved to destination table by {}", current_partition_attach_is_active, status.owner);
return TaskStatus::Finished;
}
/// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process.
/// Initialize DROP PARTITION
LOG_DEBUG(log, "Moving piece for partition " << current_partition_attach_is_active
<< " has not been successfully finished by " << status.owner
<< ". Will try to move by myself.");
LOG_DEBUG(log, "Moving piece for partition {} has not been successfully finished by {}. Will try to move by myself.", current_partition_attach_is_active, status.owner);
/// Remove is_done marker.
zookeeper->remove(current_partition_attach_is_done);
@ -591,9 +581,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
/// Move partition to original destination table.
for (size_t current_piece_number = 0; current_piece_number < task_table.number_of_splits; ++current_piece_number)
{
LOG_DEBUG(log, "Trying to move partition " << partition_name
<< " piece " << toString(current_piece_number)
<< " to original table");
LOG_DEBUG(log, "Trying to move partition {} piece {} to original table", partition_name, toString(current_piece_number));
ASTPtr query_alter_ast;
String query_alter_ast_string;
@ -614,7 +602,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
" ATTACH PARTITION " + partition_name +
" FROM " + getQuotedTable(helping_table);
LOG_DEBUG(log, "Executing ALTER query: " << query_alter_ast_string);
LOG_DEBUG(log, "Executing ALTER query: {}", query_alter_ast_string);
try
{
@ -626,13 +614,11 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
PoolMode::GET_MANY,
ClusterExecutionMode::ON_EACH_NODE);
LOG_INFO(log, "Number of nodes that executed ALTER query successfully : " << toString(num_nodes));
LOG_INFO(log, "Number of nodes that executed ALTER query successfully : {}", toString(num_nodes));
}
catch (...)
{
LOG_DEBUG(log, "Error while moving partition " << partition_name
<< " piece " << toString(current_piece_number)
<< "to original table");
LOG_DEBUG(log, "Error while moving partition {} piece {} to original table", partition_name, toString(current_piece_number));
throw;
}
@ -647,7 +633,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
query_deduplicate_ast_string += " OPTIMIZE TABLE " + getQuotedTable(original_table) +
" PARTITION " + partition_name + " DEDUPLICATE;";
LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: " << query_alter_ast_string);
LOG_DEBUG(log, "Executing OPTIMIZE DEDUPLICATE query: {}", query_alter_ast_string);
UInt64 num_nodes = executeQueryOnCluster(
task_table.cluster_push,
@ -656,14 +642,12 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t
&task_cluster->settings_push,
PoolMode::GET_MANY);
LOG_INFO(log, "Number of shard that executed OPTIMIZE DEDUPLICATE query successfully : "
<< toString(num_nodes));
LOG_INFO(log, "Number of shard that executed OPTIMIZE DEDUPLICATE query successfully : {}", toString(num_nodes));
}
}
catch (...)
{
LOG_DEBUG(log, "Error while executing OPTIMIZE DEDUPLICATE partition " << partition_name
<< "in the original table");
LOG_DEBUG(log, "Error while executing OPTIMIZE DEDUPLICATE partition {}in the original table", partition_name);
throw;
}
}
@ -702,7 +686,7 @@ ASTPtr ClusterCopier::removeAliasColumnsFromCreateQuery(const ASTPtr & query_ast
auto new_columns_list = std::make_shared<ASTColumns>();
new_columns_list->set(new_columns_list->columns, new_columns);
if (auto indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
if (const auto * indices = query_ast->as<ASTCreateQuery>()->columns_list->indices)
new_columns_list->set(new_columns_list->indices, indices->clone());
new_query.replace(new_query.columns_list, new_columns_list);
@ -759,8 +743,7 @@ bool ClusterCopier::tryDropPartitionPiece(
{
if (e.code == Coordination::ZNODEEXISTS)
{
LOG_DEBUG(log, "Partition " << task_partition.name << " piece "
<< toString(current_piece_number) << " is cleaning now by somebody, sleep");
LOG_DEBUG(log, "Partition {} piece {} is cleaning now by somebody, sleep", task_partition.name, toString(current_piece_number));
std::this_thread::sleep_for(default_sleep_time);
return false;
}
@ -773,8 +756,7 @@ bool ClusterCopier::tryDropPartitionPiece(
{
if (stat.numChildren != 0)
{
LOG_DEBUG(log, "Partition " << task_partition.name << " contains " << stat.numChildren
<< " active workers while trying to drop it. Going to sleep.");
LOG_DEBUG(log, "Partition {} contains {} active workers while trying to drop it. Going to sleep.", task_partition.name, stat.numChildren);
std::this_thread::sleep_for(default_sleep_time);
return false;
}
@ -794,7 +776,7 @@ bool ClusterCopier::tryDropPartitionPiece(
{
if (e.code == Coordination::ZNODEEXISTS)
{
LOG_DEBUG(log, "Partition " << task_partition.name << " is being filled now by somebody, sleep");
LOG_DEBUG(log, "Partition {} is being filled now by somebody, sleep", task_partition.name);
return false;
}
@ -832,7 +814,7 @@ bool ClusterCopier::tryDropPartitionPiece(
/// It is important, DROP PARTITION must be done synchronously
settings_push.replication_alter_partitions_sync = 2;
LOG_DEBUG(log, "Execute distributed DROP PARTITION: " << query);
LOG_DEBUG(log, "Execute distributed DROP PARTITION: {}", query);
/// We have to drop partition_piece on each replica
size_t num_shards = executeQueryOnCluster(
cluster_push, query,
@ -841,7 +823,7 @@ bool ClusterCopier::tryDropPartitionPiece(
PoolMode::GET_MANY,
ClusterExecutionMode::ON_EACH_NODE);
LOG_INFO(log, "DROP PARTITION was successfully executed on " << num_shards << " nodes of a cluster.");
LOG_INFO(log, "DROP PARTITION was successfully executed on {} nodes of a cluster.", num_shards);
/// Update the locking node
if (!my_clock.is_stale())
@ -859,13 +841,12 @@ bool ClusterCopier::tryDropPartitionPiece(
return false;
}
LOG_INFO(log, "Partition " << task_partition.name << " piece " << toString(current_piece_number)
<< " was dropped on cluster " << task_table.cluster_push_name);
LOG_INFO(log, "Partition {} piece {} was dropped on cluster {}", task_partition.name, toString(current_piece_number), task_table.cluster_push_name);
if (zookeeper->tryCreate(current_shards_path, host_id, zkutil::CreateMode::Persistent) == Coordination::ZNODEEXISTS)
zookeeper->set(current_shards_path, host_id);
}
LOG_INFO(log, "Partition " << task_partition.name << " piece " << toString(current_piece_number) << " is safe for work now.");
LOG_INFO(log, "Partition {} piece {} is safe for work now.", task_partition.name, toString(current_piece_number));
return true;
}
@ -889,7 +870,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
++cluster_partition.total_tries;
LOG_DEBUG(log, "Processing partition " << partition_name << " for the whole cluster");
LOG_DEBUG(log, "Processing partition {} for the whole cluster", partition_name);
/// Process each source shard having current partition and copy current partition
/// NOTE: shards are sorted by "distance" to current host
@ -911,7 +892,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
{
const size_t number_of_splits = task_table.number_of_splits;
shard->partition_tasks.emplace(partition_name, ShardPartition(*shard, partition_name, number_of_splits));
LOG_DEBUG(log, "Discovered partition " << partition_name << " in shard " << shard->getDescription());
LOG_DEBUG(log, "Discovered partition {} in shard {}", partition_name, shard->getDescription());
/// To save references in the future.
auto shard_partition_it = shard->partition_tasks.find(partition_name);
PartitionPieces & shard_partition_pieces = shard_partition_it->second.pieces;
@ -924,7 +905,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
}
else
{
LOG_DEBUG(log, "Found that shard " << shard->getDescription() << " does not contain current partition " << partition_name);
LOG_DEBUG(log, "Found that shard {} does not contain current partition {}", shard->getDescription(), partition_name);
continue;
}
}
@ -1030,21 +1011,20 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
task_table.rows_copied += cluster_partition.rows_copied;
double elapsed = cluster_partition.elapsed_time_seconds;
LOG_INFO(log, "It took " << std::fixed << std::setprecision(2) << elapsed << " seconds to copy partition " << partition_name
<< ": " << formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied) << " uncompressed bytes"
<< ", " << formatReadableQuantity(cluster_partition.rows_copied) << " rows"
<< " and " << cluster_partition.blocks_copied << " source blocks are copied");
LOG_INFO(log, "It took {} seconds to copy partition {}: {} uncompressed bytes, {} rows and {} source blocks are copied",
elapsed, partition_name,
formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied),
formatReadableQuantity(cluster_partition.rows_copied),
cluster_partition.blocks_copied);
if (cluster_partition.rows_copied)
{
LOG_INFO(log, "Average partition speed: "
<< formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed) << " per second.");
LOG_INFO(log, "Average partition speed: {} per second.", formatReadableSizeWithDecimalSuffix(cluster_partition.bytes_copied / elapsed));
}
if (task_table.rows_copied)
{
LOG_INFO(log, "Average table " << task_table.table_id << " speed: "
<< formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed) << " per second.");
LOG_INFO(log, "Average table {} speed: {} per second.", task_table.table_id, formatReadableSizeWithDecimalSuffix(task_table.bytes_copied / elapsed));
}
}
}
@ -1055,8 +1035,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab
if (!table_is_done)
{
LOG_INFO(log, "Table " + task_table.table_id + " is not processed yet."
<< "Copied " << finished_partitions << " of " << required_partitions << ", will retry");
LOG_INFO(log, "Table {} is not processed yet.Copied {} of {}, will retry", task_table.table_id, finished_partitions, required_partitions);
}
return table_is_done;
@ -1104,9 +1083,11 @@ TaskStatus ClusterCopier::iterateThroughAllPiecesInPartition(const ConnectionTim
{
for (UInt64 try_num = 0; try_num < max_shard_partition_tries; ++try_num)
{
LOG_INFO(log, "Attempt number " << try_num << " to process partition " << task_partition.name
<< " piece number " << piece_number << " on shard number " << task_partition.task_shard.numberInCluster()
<< " with index " << task_partition.task_shard.indexInCluster());
LOG_INFO(log, "Attempt number {} to process partition {} piece number {} on shard number {} with index {}.",
try_num, task_partition.name, piece_number,
task_partition.task_shard.numberInCluster(),
task_partition.task_shard.indexInCluster());
res = processPartitionPieceTaskImpl(timeouts, task_partition, piece_number, is_unprioritized_task);
/// Exit if success
@ -1210,7 +1191,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
/// Load balancing
auto worker_node_holder = createTaskWorkerNodeAndWaitIfNeed(zookeeper, current_task_piece_status_path, is_unprioritized_task);
LOG_DEBUG(log, "Processing " << current_task_piece_status_path);
LOG_DEBUG(log, "Processing {}", current_task_piece_status_path);
const String piece_status_path = partition_piece.getPartitionPieceShardsPath();
@ -1221,14 +1202,12 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
/// Do not start if partition piece is dirty, try to clean it
if (is_clean)
{
LOG_DEBUG(log, "Partition " << task_partition.name
<< " piece " + toString(current_piece_number) + " appears to be clean");
LOG_DEBUG(log, "Partition {} piece {} appears to be clean", task_partition.name, current_piece_number);
zookeeper->createAncestors(current_task_piece_status_path);
}
else
{
LOG_DEBUG(log, "Partition " << task_partition.name
<< " piece " + toString(current_piece_number) + " is dirty, try to drop it");
LOG_DEBUG(log, "Partition {} piece {} is dirty, try to drop it", task_partition.name, current_piece_number);
try
{
@ -1253,7 +1232,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
{
if (e.code == Coordination::ZNODEEXISTS)
{
LOG_DEBUG(log, "Someone is already processing " << current_task_piece_is_active_path);
LOG_DEBUG(log, "Someone is already processing {}", current_task_piece_is_active_path);
return TaskStatus::Active;
}
@ -1269,16 +1248,13 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
TaskStateWithOwner status = TaskStateWithOwner::fromString(status_data);
if (status.state == TaskState::Finished)
{
LOG_DEBUG(log, "Task " << current_task_piece_status_path
<< " has been successfully executed by " << status.owner);
LOG_DEBUG(log, "Task {} has been successfully executed by {}", current_task_piece_status_path, status.owner);
return TaskStatus::Finished;
}
/// Task is abandoned, because previously we created ephemeral node, possibly in other copier's process.
/// Initialize DROP PARTITION
LOG_DEBUG(log, "Task " << current_task_piece_status_path
<< " has not been successfully finished by " << status.owner
<< ". Partition will be dropped and refilled.");
LOG_DEBUG(log, "Task {} has not been successfully finished by {}. Partition will be dropped and refilled.", current_task_piece_status_path, status.owner);
create_is_dirty_node(clean_state_clock);
return TaskStatus::Error;
@ -1293,11 +1269,9 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
String state_finished = TaskStateWithOwner::getData(TaskState::Finished, host_id);
auto res = zookeeper->tryCreate(current_task_piece_status_path, state_finished, zkutil::CreateMode::Persistent);
if (res == Coordination::ZNODEEXISTS)
LOG_DEBUG(log, "Partition " << task_partition.name << " piece "
+ toString(current_piece_number) + " is absent on current replica of a shard. But other replicas have already marked it as done.");
LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. But other replicas have already marked it as done.", task_partition.name, current_piece_number);
if (res == Coordination::ZOK)
LOG_DEBUG(log, "Partition " << task_partition.name << " piece "
+ toString(current_piece_number) + " is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.");
LOG_DEBUG(log, "Partition {} piece {} is absent on current replica of a shard. Will mark it as done. Other replicas will do the same.", task_partition.name, current_piece_number);
return TaskStatus::Finished;
}
@ -1325,18 +1299,14 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
if (count != 0)
{
LOG_INFO(log, "Partition " << task_partition.name << " piece "
<< current_piece_number << "is not empty. In contains " << count << " rows.");
LOG_INFO(log, "Partition {} piece {}is not empty. In contains {} rows.", task_partition.name, current_piece_number, count);
Coordination::Stat stat_shards{};
zookeeper->get(partition_piece.getPartitionPieceShardsPath(), &stat_shards);
/// NOTE: partition is still fresh if dirt discovery happens before cleaning
if (stat_shards.numChildren == 0)
{
LOG_WARNING(log, "There are no workers for partition " << task_partition.name
<< " piece " << toString(current_piece_number)
<< ", but destination table contains " << count << " rows"
<< ". Partition will be dropped and refilled.");
LOG_WARNING(log, "There are no workers for partition {} piece {}, but destination table contains {} rows. Partition will be dropped and refilled.", task_partition.name, toString(current_piece_number), count);
create_is_dirty_node(clean_state_clock);
return TaskStatus::Error;
@ -1353,14 +1323,12 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path);
if (clean_state_clock != new_clean_state_clock)
{
LOG_INFO(log, "Partition " << task_partition.name << " piece "
<< toString(current_piece_number) << " clean state changed, cowardly bailing");
LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number));
return TaskStatus::Error;
}
else if (!new_clean_state_clock.is_clean())
{
LOG_INFO(log, "Partition " << task_partition.name << " piece "
<< toString(current_piece_number) << " is dirty and will be dropped and refilled");
LOG_INFO(log, "Partition {} piece {} is dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number));
create_is_dirty_node(new_clean_state_clock);
return TaskStatus::Error;
}
@ -1387,12 +1355,11 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
create_query_push_ast->as<ASTCreateQuery &>().if_not_exists = true;
String query = queryToString(create_query_push_ast);
LOG_DEBUG(log, "Create destination tables. Query: " << query);
LOG_DEBUG(log, "Create destination tables. Query: {}", query);
UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query,
create_query_push_ast, &task_cluster->settings_push,
PoolMode::GET_MANY);
LOG_DEBUG(log, "Destination tables " << getQuotedTable(task_table.table_push)
<< " have been created on " << shards << " shards of " << task_table.cluster_push->getShardCount());
LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}", getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount());
}
/// Do the copying
@ -1407,8 +1374,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
// Select all fields
ASTPtr query_select_ast = get_select_query(task_shard.table_read_shard, "*", /*enable_splitting*/ true, inject_fault ? "1" : "");
LOG_DEBUG(log, "Executing SELECT query and pull from " << task_shard.getDescription()
<< " : " << queryToString(query_select_ast));
LOG_DEBUG(log, "Executing SELECT query and pull from {} : {}", task_shard.getDescription(), queryToString(query_select_ast));
ASTPtr query_insert_ast;
{
@ -1419,7 +1385,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
const auto & settings = context.getSettingsRef();
query_insert_ast = parseQuery(p_query, query, settings.max_query_size, settings.max_parser_depth);
LOG_DEBUG(log, "Executing INSERT query: " << query);
LOG_DEBUG(log, "Executing INSERT query: {}", query);
}
try
@ -1501,8 +1467,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
}
}
LOG_INFO(log, "Partition " << task_partition.name << " piece "
<< toString(current_piece_number) << " copied. But not moved to original destination table.");
LOG_INFO(log, "Partition {} piece {} copied. But not moved to original destination table.", task_partition.name, toString(current_piece_number));
/// Try create original table (if not exists) on each shard
@ -1513,12 +1478,11 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
create_query_push_ast->as<ASTCreateQuery &>().if_not_exists = true;
String query = queryToString(create_query_push_ast);
LOG_DEBUG(log, "Create destination tables. Query: " << query);
LOG_DEBUG(log, "Create destination tables. Query: {}", query);
UInt64 shards = executeQueryOnCluster(task_table.cluster_push, query,
create_query_push_ast, &task_cluster->settings_push,
PoolMode::GET_MANY);
LOG_DEBUG(log, "Destination tables " << getQuotedTable(task_table.table_push)
<< " have been created on " << shards << " shards of " << task_table.cluster_push->getShardCount());
LOG_DEBUG(log, "Destination tables {} have been created on {} shards of {}", getQuotedTable(task_table.table_push), shards, task_table.cluster_push->getShardCount());
}
catch (...)
{
@ -1531,14 +1495,12 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl(
CleanStateClock new_clean_state_clock (zookeeper, piece_is_dirty_flag_path, piece_is_dirty_cleaned_path);
if (clean_state_clock != new_clean_state_clock)
{
LOG_INFO(log, "Partition " << task_partition.name << " piece "
<< toString(current_piece_number) << " clean state changed, cowardly bailing");
LOG_INFO(log, "Partition {} piece {} clean state changed, cowardly bailing", task_partition.name, toString(current_piece_number));
return TaskStatus::Error;
}
else if (!new_clean_state_clock.is_clean())
{
LOG_INFO(log, "Partition " << task_partition.name << " piece "
<< toString(current_piece_number) << " became dirty and will be dropped and refilled");
LOG_INFO(log, "Partition {} piece {} became dirty and will be dropped and refilled", task_partition.name, toString(current_piece_number));
create_is_dirty_node(new_clean_state_clock);
return TaskStatus::Error;
}
@ -1582,7 +1544,7 @@ void ClusterCopier::dropHelpingTables(const TaskTable & task_table)
const ClusterPtr & cluster_push = task_table.cluster_push;
Settings settings_push = task_cluster->settings_push;
LOG_DEBUG(log, "Execute distributed DROP TABLE: " << query);
LOG_DEBUG(log, "Execute distributed DROP TABLE: {}", query);
/// We have to drop partition_piece on each replica
UInt64 num_nodes = executeQueryOnCluster(
cluster_push, query,
@ -1591,7 +1553,7 @@ void ClusterCopier::dropHelpingTables(const TaskTable & task_table)
PoolMode::GET_MANY,
ClusterExecutionMode::ON_EACH_NODE);
LOG_DEBUG(log, "DROP TABLE query was successfully executed on " << toString(num_nodes) << " nodes.");
LOG_DEBUG(log, "DROP TABLE query was successfully executed on {} nodes.", toString(num_nodes));
}
}
@ -1609,7 +1571,7 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
const ClusterPtr & cluster_push = task_table.cluster_push;
Settings settings_push = task_cluster->settings_push;
LOG_DEBUG(log, "Execute distributed DROP PARTITION: " << query);
LOG_DEBUG(log, "Execute distributed DROP PARTITION: {}", query);
/// We have to drop partition_piece on each replica
UInt64 num_nodes = executeQueryOnCluster(
cluster_push, query,
@ -1618,9 +1580,9 @@ void ClusterCopier::dropParticularPartitionPieceFromAllHelpingTables(const TaskT
PoolMode::GET_MANY,
ClusterExecutionMode::ON_EACH_NODE);
LOG_DEBUG(log, "DROP PARTITION query was successfully executed on " << toString(num_nodes) << " nodes.");
LOG_DEBUG(log, "DROP PARTITION query was successfully executed on {} nodes.", toString(num_nodes));
}
LOG_DEBUG(log, "All helping tables dropped partition " << partition_name);
LOG_DEBUG(log, "All helping tables dropped partition {}", partition_name);
}
String ClusterCopier::getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings * settings)
@ -1724,7 +1686,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
const auto & settings = context.getSettingsRef();
ASTPtr query_ast = parseQuery(parser_query, query, settings.max_query_size, settings.max_parser_depth);
LOG_DEBUG(log, "Computing destination partition set, executing query: " << query);
LOG_DEBUG(log, "Computing destination partition set, executing query: {}", query);
Context local_context = context;
local_context.setSettings(task_cluster->settings_pull);
@ -1744,7 +1706,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
}
}
LOG_DEBUG(log, "There are " << res.size() << " destination partitions in shard " << task_shard.getDescription());
LOG_DEBUG(log, "There are {} destination partitions in shard {}", res.size(), task_shard.getDescription());
return res;
}
@ -1765,8 +1727,7 @@ bool ClusterCopier::checkShardHasPartition(const ConnectionTimeouts & timeouts,
query += " LIMIT 1";
LOG_DEBUG(log, "Checking shard " << task_shard.getDescription() << " for partition "
<< partition_quoted_name << " existence, executing query: " << query);
LOG_DEBUG(log, "Checking shard {} for partition {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, query);
ParserQuery parser_query(query.data() + query.size());
const auto & settings = context.getSettingsRef();
@ -1805,9 +1766,7 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
query += " LIMIT 1";
LOG_DEBUG(log, "Checking shard " << task_shard.getDescription() << " for partition "
<< partition_quoted_name << " piece " << std::to_string(current_piece_number)
<< "existence, executing query: " << query);
LOG_DEBUG(log, "Checking shard {} for partition {} piece {} existence, executing query: {}", task_shard.getDescription(), partition_quoted_name, std::to_string(current_piece_number), query);
ParserQuery parser_query(query.data() + query.size());
const auto & settings = context.getSettingsRef();
@ -1817,11 +1776,9 @@ bool ClusterCopier::checkPresentPartitionPiecesOnCurrentShard(const ConnectionTi
local_context.setSettings(task_cluster->settings_pull);
auto result = InterpreterFactory::get(query_ast, local_context)->execute().in->read().rows();
if (result != 0)
LOG_DEBUG(log, "Partition " << partition_quoted_name << " piece number "
<< std::to_string(current_piece_number) << " is PRESENT on shard " << task_shard.getDescription());
LOG_DEBUG(log, "Partition {} piece number {} is PRESENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
else
LOG_DEBUG(log, "Partition " << partition_quoted_name << " piece number "
<< std::to_string(current_piece_number) << " is ABSENT on shard " << task_shard.getDescription());
LOG_DEBUG(log, "Partition {} piece number {} is ABSENT on shard {}", partition_quoted_name, std::to_string(current_piece_number), task_shard.getDescription());
return result != 0;
}
@ -1938,8 +1895,7 @@ UInt64 ClusterCopier::executeQueryOnCluster(
if (execution_mode == ClusterExecutionMode::ON_EACH_NODE && successful_nodes != origin_replicas_number)
{
LOG_INFO(log, "There was an error while executing ALTER on each node. Query was executed on "
<< toString(successful_nodes) << " nodes. But had to be executed on " << toString(origin_replicas_number.load()));
LOG_INFO(log, "There was an error while executing ALTER on each node. Query was executed on {} nodes. But had to be executed on {}", toString(successful_nodes), toString(origin_replicas_number.load()));
}

View File

@ -94,12 +94,8 @@ void ClusterCopierApp::mainImpl()
StatusFile status_file(process_path + "/status");
ThreadStatus thread_status;
auto log = &logger();
LOG_INFO(log, "Starting clickhouse-copier ("
<< "id " << process_id << ", "
<< "host_id " << host_id << ", "
<< "path " << process_path << ", "
<< "revision " << ClickHouseRevision::get() << ")");
auto * log = &logger();
LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::get());
SharedContextHolder shared_context = Context::createShared();
auto context = std::make_unique<Context>(Context::createGlobal(shared_context.get()));

View File

@ -260,7 +260,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
return res;
res.is_remote = 1;
for (auto & replica : replicas)
for (const auto & replica : replicas)
{
if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name)))
{
@ -270,7 +270,7 @@ ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std
}
res.hostname_difference = std::numeric_limits<size_t>::max();
for (auto & replica : replicas)
for (const auto & replica : replicas)
{
size_t difference = getHostNameDifference(local_hostname, replica.host_name);
res.hostname_difference = std::min(difference, res.hostname_difference);

View File

@ -183,11 +183,11 @@ public:
switch (rsp.type)
{
case Coordination::CREATED:
LOG_DEBUG(logger, "CleanStateClock change: CREATED, at " << rsp.path);
LOG_DEBUG(logger, "CleanStateClock change: CREATED, at {}", rsp.path);
stale->store(true);
break;
case Coordination::CHANGED:
LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at" << rsp.path);
LOG_DEBUG(logger, "CleanStateClock change: CHANGED, at {}", rsp.path);
stale->store(true);
}
}

View File

@ -8,7 +8,6 @@
#include <Poco/NullChannel.h>
#include <Databases/DatabaseMemory.h>
#include <Storages/System/attachSystemTables.h>
#include <Interpreters/Context.h>
#include <Interpreters/ProcessList.h>
#include <Interpreters/executeQuery.h>
#include <Interpreters/loadMetadata.h>
@ -212,7 +211,7 @@ try
/// Lock path directory before read
status.emplace(context->getPath() + "status");
LOG_DEBUG(log, "Loading metadata from " << context->getPath());
LOG_DEBUG(log, "Loading metadata from {}", context->getPath());
loadMetadataSystem(*context);
attachSystemTables();
loadMetadata(*context);

View File

@ -4,13 +4,12 @@
#include <Poco/Util/Application.h>
#include <memory>
#include <loggers/Loggers.h>
#include <Interpreters/Context.h>
namespace DB
{
class Context;
/// Lightweight Application for clickhouse-local
/// No networking, no extra configs and working directories, no pid and status files, no dictionaries, no logging.
/// Quiet mode by default

View File

@ -937,10 +937,10 @@ public:
if (typeid_cast<const DataTypeFixedString *>(&data_type))
return std::make_unique<FixedStringModel>(seed);
if (auto type = typeid_cast<const DataTypeArray *>(&data_type))
if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));
if (auto type = typeid_cast<const DataTypeNullable *>(&data_type))
if (const auto * type = typeid_cast<const DataTypeNullable *>(&data_type))
return std::make_unique<NullableModel>(get(*type->getNestedType(), seed, markov_model_params));
throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -62,7 +62,7 @@ namespace
void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
{
Poco::Net::HTMLForm params(request, request.stream());
LOG_TRACE(log, "Request URI: " + request.getURI());
LOG_TRACE(log, "Request URI: {}", request.getURI());
auto process_error = [&response, this](const std::string & message)
{
@ -89,11 +89,11 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
if (params.has("schema"))
{
schema_name = params.get("schema");
LOG_TRACE(log, "Will fetch info for table '" << schema_name + "." + table_name << "'");
LOG_TRACE(log, "Will fetch info for table '{}'", schema_name + "." + table_name);
}
else
LOG_TRACE(log, "Will fetch info for table '" << table_name << "'");
LOG_TRACE(log, "Got connection str '" << connection_string << "'");
LOG_TRACE(log, "Will fetch info for table '{}'", table_name);
LOG_TRACE(log, "Got connection str '{}'", connection_string);
try
{
@ -124,7 +124,7 @@ void ODBCColumnsInfoHandler::handleRequest(Poco::Net::HTTPServerRequest & reques
select->format(settings);
std::string query = ss.str();
LOG_TRACE(log, "Inferring structure with query '" << query << "'");
LOG_TRACE(log, "Inferring structure with query '{}'", query);
if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLPrepare(hstmt, reinterpret_cast<SQLCHAR *>(query.data()), query.size())))
throw POCO_SQL_ODBC_CLASS::DescriptorException(session.dbc());

View File

@ -10,7 +10,7 @@ namespace DB
Poco::Net::HTTPRequestHandler * HandlerFactory::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
{
Poco::URI uri{request.getURI()};
LOG_TRACE(log, "Request URI: " + uri.toString());
LOG_TRACE(log, "Request URI: {}", uri.toString());
if (uri.getPath() == "/ping" && request.getMethod() == Poco::Net::HTTPRequest::HTTP_GET)
return new PingHandler(keep_alive_timeout);

View File

@ -25,7 +25,7 @@ namespace DB
void IdentifierQuoteHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response)
{
Poco::Net::HTMLForm params(request, request.stream());
LOG_TRACE(log, "Request URI: " + request.getURI());
LOG_TRACE(log, "Request URI: {}", request.getURI());
auto process_error = [&response, this](const std::string & message)
{

View File

@ -84,7 +84,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
Poco::Net::HTMLForm params(request);
if (mode == "read")
params.read(request.stream());
LOG_TRACE(log, "Request URI: " + request.getURI());
LOG_TRACE(log, "Request URI: {}", request.getURI());
if (mode == "read" && !params.has("query"))
{
@ -132,7 +132,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
std::string format = params.get("format", "RowBinary");
std::string connection_string = params.get("connection_string");
LOG_TRACE(log, "Connection string: '" << connection_string << "'");
LOG_TRACE(log, "Connection string: '{}'", connection_string);
WriteBufferFromHTTPServerResponse out(request, response, keep_alive_timeout);
@ -152,7 +152,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
}
std::string db_name = params.get("db_name");
std::string table_name = params.get("table_name");
LOG_TRACE(log, "DB name: '" << db_name << "', table name: '" << table_name << "'");
LOG_TRACE(log, "DB name: '{}', table name: '{}'", db_name, table_name);
auto quoting_style = IdentifierQuotingStyle::None;
#if USE_ODBC
@ -171,7 +171,7 @@ void ODBCHandler::handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Ne
else
{
std::string query = params.get("query");
LOG_TRACE(log, "Query: " << query);
LOG_TRACE(log, "Query: {}", query);
BlockOutputStreamPtr writer = FormatFactory::instance().getOutput(format, out, *sample_block, context);
auto pool = getPool(connection_string);

View File

@ -24,8 +24,8 @@ namespace
query.table_id.table_name = table_name;
query.columns = std::make_shared<ASTExpressionList>(',');
query.children.push_back(query.columns);
for (size_t i = 0; i < columns.size(); ++i)
query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(columns[i].name));
for (const auto & column : columns)
query.columns->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
std::stringstream ss;
IAST::FormatSettings settings(ss, true);

View File

@ -48,12 +48,7 @@ namespace
#endif
)
{
LOG_ERROR(log,
"Cannot resolve listen_host (" << host << "), error " << e.code() << ": " << e.message()
<< ". "
"If it is an IPv6 address and your host has disabled IPv6, then consider to "
"specify IPv4 address to listen in <listen_host> element of configuration "
"file. Example: <listen_host>0.0.0.0</listen_host>");
LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. If it is an IPv6 address and your host has disabled IPv6, then consider to specify IPv4 address to listen in <listen_host> element of configuration file. Example: <listen_host>0.0.0.0</listen_host>", host, e.code(), e.message());
}
throw;
@ -188,7 +183,7 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
new HandlerFactory("ODBCRequestHandlerFactory-factory", keep_alive_timeout, context), server_pool, socket, http_params);
server.start();
LOG_INFO(log, "Listening http://" + address.toString());
LOG_INFO(log, "Listening http://{}", address.toString());
SCOPE_EXIT({
LOG_DEBUG(log, "Received termination signal.");
@ -198,7 +193,7 @@ int ODBCBridge::main(const std::vector<std::string> & /*args*/)
{
if (server.currentConnections() == 0)
break;
LOG_DEBUG(log, "Waiting for " << server.currentConnections() << " connections, try " << count);
LOG_DEBUG(log, "Waiting for {} connections, try {}", server.currentConnections(), count);
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
}
});

View File

@ -195,7 +195,7 @@ void HTTPHandler::pushDelayedResults(Output & used_output)
std::vector<ReadBufferPtr> read_buffers;
std::vector<ReadBuffer *> read_buffers_raw_ptr;
auto cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
auto * cascade_buffer = typeid_cast<CascadeWriteBuffer *>(used_output.out_maybe_delayed_and_compressed.get());
if (!cascade_buffer)
throw Exception("Expected CascadeWriteBuffer", ErrorCodes::LOGICAL_ERROR);
@ -241,7 +241,7 @@ void HTTPHandler::processQuery(
CurrentThread::QueryScope query_scope(context);
LOG_TRACE(log, "Request URI: " << request.getURI());
LOG_TRACE(log, "Request URI: {}", request.getURI());
std::istream & istr = request.stream();
@ -383,7 +383,7 @@ void HTTPHandler::processQuery(
{
auto push_memory_buffer_and_continue = [next_buffer = used_output.out_maybe_compressed] (const WriteBufferPtr & prev_buf)
{
auto prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
auto * prev_memory_buffer = typeid_cast<MemoryWriteBuffer *>(prev_buf.get());
if (!prev_memory_buffer)
throw Exception("Expected MemoryWriteBuffer", ErrorCodes::LOGICAL_ERROR);

View File

@ -28,19 +28,16 @@ HTTPRequestHandlerFactoryMain::HTTPRequestHandlerFactoryMain(const std::string &
{
}
Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request) // override
Poco::Net::HTTPRequestHandler * HTTPRequestHandlerFactoryMain::createRequestHandler(const Poco::Net::HTTPServerRequest & request)
{
LOG_TRACE(log, "HTTP Request for " << name << ". "
<< "Method: " << request.getMethod()
<< ", Address: " << request.clientAddress().toString()
<< ", User-Agent: " << (request.has("User-Agent") ? request.get("User-Agent") : "none")
<< (request.hasContentLength() ? (", Length: " + std::to_string(request.getContentLength())) : (""))
<< ", Content Type: " << request.getContentType()
<< ", Transfer Encoding: " << request.getTransferEncoding());
LOG_TRACE(log, "HTTP Request for {}. Method: {}, Address: {}, User-Agent: {}{}, Content Type: {}, Transfer Encoding: {}",
name, request.getMethod(), request.clientAddress().toString(), request.has("User-Agent") ? request.get("User-Agent") : "none",
(request.hasContentLength() ? (", Length: " + std::to_string(request.getContentLength())) : ("")),
request.getContentType(), request.getTransferEncoding());
for (auto & handler_factory : child_factories)
{
auto handler = handler_factory->createRequestHandler(request);
auto * handler = handler_factory->createRequestHandler(request);
if (handler != nullptr)
return handler;
}
@ -72,80 +69,98 @@ HTTPRequestHandlerFactoryMain::TThis * HTTPRequestHandlerFactoryMain::addHandler
static inline auto createHandlersFactoryFromConfig(IServer & server, const std::string & name, const String & prefix)
{
auto main_handler_factory = new HTTPRequestHandlerFactoryMain(name);
auto main_handler_factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
try
Poco::Util::AbstractConfiguration::Keys keys;
server.config().keys(prefix, keys);
for (const auto & key : keys)
{
Poco::Util::AbstractConfiguration::Keys keys;
server.config().keys(prefix, keys);
if (!startsWith(key, "rule"))
throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
for (const auto & key : keys)
{
if (!startsWith(key, "rule"))
throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");
const auto & handler_type = server.config().getString(prefix + "." + key + ".handler.type", "");
if (handler_type == "static")
main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
else if (handler_type == "dynamic_query_handler")
main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
else if (handler_type == "predefined_query_handler")
main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
else if (handler_type.empty())
throw Exception("Handler type in config is not specified here: " +
prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
else
throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
}
return main_handler_factory;
}
catch (...)
{
delete main_handler_factory;
throw;
if (handler_type == "static")
main_handler_factory->addHandler(createStaticHandlerFactory(server, prefix + "." + key));
else if (handler_type == "dynamic_query_handler")
main_handler_factory->addHandler(createDynamicHandlerFactory(server, prefix + "." + key));
else if (handler_type == "predefined_query_handler")
main_handler_factory->addHandler(createPredefinedHandlerFactory(server, prefix + "." + key));
else if (handler_type.empty())
throw Exception("Handler type in config is not specified here: " +
prefix + "." + key + ".handler.type", ErrorCodes::INVALID_CONFIG_PARAMETER);
else
throw Exception("Unknown handler type '" + handler_type +"' in config here: " +
prefix + "." + key + ".handler.type",ErrorCodes::INVALID_CONFIG_PARAMETER);
}
return main_handler_factory.release();
}
static const auto ping_response_expression = "Ok.\n";
static const auto root_response_expression = "config://http_server_default_response";
static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
static inline Poco::Net::HTTPRequestHandlerFactory * createHTTPHandlerFactory(
IServer & server, const std::string & name, AsynchronousMetrics & async_metrics)
{
if (server.config().has("http_handlers"))
return createHandlersFactoryFromConfig(server, name, "http_handlers");
else
{
auto factory = (new HTTPRequestHandlerFactoryMain(name))
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
->attachStrictPath("/")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
->attachStrictPath("/ping")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>(server, "query"))->allowPostAndGetParamsRequest());
auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
factory->addHandler(root_handler.release());
auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
factory->addHandler(ping_handler.release());
auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
factory->addHandler(replicas_status_handler.release());
auto query_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<DynamicQueryHandler>>(server, "query");
query_handler->allowPostAndGetParamsRequest();
factory->addHandler(query_handler.release());
/// We check that prometheus handler will be served on current (default) port.
/// Otherwise it will be created separately, see below.
if (server.config().has("prometheus") && server.config().getInt("prometheus.port", 0) == 0)
factory->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
{
auto prometheus_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
prometheus_handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
factory->addHandler(prometheus_handler.release());
}
return factory;
return factory.release();
}
}
static inline Poco::Net::HTTPRequestHandlerFactory * createInterserverHTTPHandlerFactory(IServer & server, const std::string & name)
{
return (new HTTPRequestHandlerFactoryMain(name))
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, root_response_expression))
->attachStrictPath("/")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<StaticRequestHandler>(server, ping_response_expression))
->attachStrictPath("/ping")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>(server))
->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest())
->addHandler((new HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>(server))->allowPostAndGetParamsRequest());
auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
auto root_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, root_response_expression);
root_handler->attachStrictPath("/")->allowGetAndHeadRequest();
factory->addHandler(root_handler.release());
auto ping_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<StaticRequestHandler>>(server, ping_response_expression);
ping_handler->attachStrictPath("/ping")->allowGetAndHeadRequest();
factory->addHandler(ping_handler.release());
auto replicas_status_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<ReplicasStatusHandler>>(server);
replicas_status_handler->attachNonStrictPath("/replicas_status")->allowGetAndHeadRequest();
factory->addHandler(replicas_status_handler.release());
auto main_handler = std::make_unique<HandlingRuleHTTPHandlerFactory<InterserverIOHTTPHandler>>(server);
main_handler->allowPostAndGetParamsRequest();
factory->addHandler(main_handler.release());
return factory.release();
}
Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, AsynchronousMetrics & async_metrics, const std::string & name)
@ -155,9 +170,14 @@ Poco::Net::HTTPRequestHandlerFactory * createHandlerFactory(IServer & server, As
else if (name == "InterserverIOHTTPHandler-factory" || name == "InterserverIOHTTPSHandler-factory")
return createInterserverHTTPHandlerFactory(server, name);
else if (name == "PrometheusHandler-factory")
return (new HTTPRequestHandlerFactoryMain(name))->addHandler((new HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics)))
->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest());
{
auto factory = std::make_unique<HTTPRequestHandlerFactoryMain>(name);
auto handler = std::make_unique<HandlingRuleHTTPHandlerFactory<PrometheusRequestHandler>>(
server, PrometheusMetricsWriter(server.config(), "prometheus", async_metrics));
handler->attachStrictPath(server.config().getString("prometheus.endpoint", "/metrics"))->allowGetAndHeadRequest();
factory->addHandler(handler.release());
return factory.release();
}
throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR);
}

View File

@ -53,7 +53,7 @@ void InterserverIOHTTPHandler::processQuery(Poco::Net::HTTPServerRequest & reque
{
HTMLForm params(request);
LOG_TRACE(log, "Request URI: " << request.getURI());
LOG_TRACE(log, "Request URI: {}", request.getURI());
String endpoint_name = params.get("endpoint");
bool compress = params.get("compress") == "true";
@ -103,7 +103,7 @@ void InterserverIOHTTPHandler::handleRequest(Poco::Net::HTTPServerRequest & requ
response.setStatusAndReason(Poco::Net::HTTPServerResponse::HTTP_UNAUTHORIZED);
if (!response.sent())
writeString(message, *used_output.out);
LOG_WARNING(log, "Query processing failed request: '" << request.getURI() << "' authentication failed");
LOG_WARNING(log, "Query processing failed request: '{}' authentication failed", request.getURI());
}
}
catch (Exception & e)

View File

@ -83,19 +83,15 @@ void MySQLHandler::run()
if (!connection_context.mysql.max_packet_size)
connection_context.mysql.max_packet_size = MAX_PACKET_LENGTH;
LOG_TRACE(log, "Capabilities: " << handshake_response.capability_flags
<< ", max_packet_size: "
<< handshake_response.max_packet_size
<< ", character_set: "
<< static_cast<int>(handshake_response.character_set)
<< ", user: "
<< handshake_response.username
<< ", auth_response length: "
<< handshake_response.auth_response.length()
<< ", database: "
<< handshake_response.database
<< ", auth_plugin_name: "
<< handshake_response.auth_plugin_name);
LOG_TRACE(log,
"Capabilities: {}, max_packet_size: {}, character_set: {}, user: {}, auth_response length: {}, database: {}, auth_plugin_name: {}",
handshake_response.capability_flags,
handshake_response.max_packet_size,
static_cast<int>(handshake_response.character_set),
handshake_response.username,
handshake_response.auth_response.length(),
handshake_response.database,
handshake_response.auth_plugin_name);
client_capability_flags = handshake_response.capability_flags;
if (!(client_capability_flags & CLIENT_PROTOCOL_41))
@ -129,7 +125,9 @@ void MySQLHandler::run()
// For commands which are executed without MemoryTracker.
LimitReadBuffer limited_payload(payload, 10000, true, "too long MySQL packet.");
LOG_DEBUG(log, "Received command: " << static_cast<int>(static_cast<unsigned char>(command)) << ". Connection id: " << connection_id << ".");
LOG_DEBUG(log, "Received command: {}. Connection id: {}.",
static_cast<int>(static_cast<unsigned char>(command)), connection_id);
try
{
switch (command)
@ -197,7 +195,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::HandshakeResponse & packet)
read_bytes(3); /// We can find out whether it is SSLRequest of HandshakeResponse by first 3 bytes.
size_t payload_size = unalignedLoad<uint32_t>(buf) & 0xFFFFFFu;
LOG_TRACE(log, "payload size: " << payload_size);
LOG_TRACE(log, "payload size: {}", payload_size);
if (payload_size == SSL_REQUEST_PAYLOAD_SIZE)
{
@ -234,18 +232,18 @@ void MySQLHandler::authenticate(const String & user_name, const String & auth_pl
}
catch (const Exception & exc)
{
LOG_ERROR(log, "Authentication for user " << user_name << " failed.");
LOG_ERROR(log, "Authentication for user {} failed.", user_name);
packet_sender->sendPacket(ERR_Packet(exc.code(), "00000", exc.message()), true);
throw;
}
LOG_INFO(log, "Authentication for user " << user_name << " succeeded.");
LOG_INFO(log, "Authentication for user {} succeeded.", user_name);
}
void MySQLHandler::comInitDB(ReadBuffer & payload)
{
String database;
readStringUntilEOF(database, payload);
LOG_DEBUG(log, "Setting current database to " << database);
LOG_DEBUG(log, "Setting current database to {}", database);
connection_context.setCurrentDatabase(database);
packet_sender->sendPacket(OK_Packet(0, client_capability_flags, 0, 0, 1), true);
}

View File

@ -32,7 +32,7 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
}
catch (...)
{
LOG_TRACE(log, "Failed to create SSL context. SSL will be disabled. Error: " << getCurrentExceptionMessage(false));
LOG_TRACE(log, "Failed to create SSL context. SSL will be disabled. Error: {}", getCurrentExceptionMessage(false));
ssl_enabled = false;
}
@ -43,7 +43,7 @@ MySQLHandlerFactory::MySQLHandlerFactory(IServer & server_)
}
catch (...)
{
LOG_TRACE(log, "Failed to read RSA key pair from server certificate. Error: " << getCurrentExceptionMessage(false));
LOG_TRACE(log, "Failed to read RSA key pair from server certificate. Error: {}", getCurrentExceptionMessage(false));
generateRSAKeys();
}
#endif
@ -122,7 +122,7 @@ void MySQLHandlerFactory::generateRSAKeys()
Poco::Net::TCPServerConnection * MySQLHandlerFactory::createConnection(const Poco::Net::StreamSocket & socket)
{
size_t connection_id = last_connection_id++;
LOG_TRACE(log, "MySQL connection. Id: " << connection_id << ". Address: " << socket.peerAddress().toString());
LOG_TRACE(log, "MySQL connection. Id: {}. Address: {}", connection_id, socket.peerAddress().toString());
#if USE_SSL
return new MySQLHandlerSSL(server, socket, ssl_enabled, connection_id, *public_key, *private_key);
#else

View File

@ -46,7 +46,7 @@ void ReplicasStatusHandler::handleRequest(Poco::Net::HTTPServerRequest & request
for (auto iterator = db.second->getTablesIterator(); iterator->isValid(); iterator->next())
{
auto & table = iterator->table();
const auto & table = iterator->table();
StorageReplicatedMergeTree * table_replicated = dynamic_cast<StorageReplicatedMergeTree *>(table.get());
if (!table_replicated)

View File

@ -91,7 +91,7 @@ namespace
void setupTmpPath(Logger * log, const std::string & path)
{
LOG_DEBUG(log, "Setting up " << path << " to store temporary data in it");
LOG_DEBUG(log, "Setting up {} to store temporary data in it", path);
Poco::File(path).createDirectories();
@ -101,11 +101,11 @@ void setupTmpPath(Logger * log, const std::string & path)
{
if (it->isFile() && startsWith(it.name(), "tmp"))
{
LOG_DEBUG(log, "Removing old temporary file " << it->path());
LOG_DEBUG(log, "Removing old temporary file {}", it->path());
it->remove();
}
else
LOG_DEBUG(log, "Skipped file in temporary path " << it->path());
LOG_DEBUG(log, "Skipped file in temporary path {}", it->path());
}
}
@ -276,7 +276,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
{
LOG_TRACE(log, "Will mlockall to prevent executable memory from being paged out. It may take a few seconds.");
if (0 != mlockall(MCL_CURRENT))
LOG_WARNING(log, "Failed mlockall: " + errnoToString(ErrorCodes::SYSTEM_ERROR));
LOG_WARNING(log, "Failed mlockall: {}", errnoToString(ErrorCodes::SYSTEM_ERROR));
else
LOG_TRACE(log, "The memory map of clickhouse executable has been mlock'ed");
}
@ -284,8 +284,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
{
LOG_INFO(log, "It looks like the process has no CAP_IPC_LOCK capability, binary mlock will be disabled."
" It could happen due to incorrect ClickHouse package installation."
" You could resolve the problem manually with 'sudo setcap cap_ipc_lock=+ep " << executable_path << "'."
" Note that it will not work on 'nosuid' mounted filesystems.");
" You could resolve the problem manually with 'sudo setcap cap_ipc_lock=+ep {}'."
" Note that it will not work on 'nosuid' mounted filesystems.", executable_path);
}
}
}
@ -349,7 +349,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (rlim.rlim_cur == rlim.rlim_max)
{
LOG_DEBUG(log, "rlimit on number of file descriptors is " << rlim.rlim_cur);
LOG_DEBUG(log, "rlimit on number of file descriptors is {}", rlim.rlim_cur);
}
else
{
@ -357,12 +357,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
rlim.rlim_cur = config().getUInt("max_open_files", rlim.rlim_max);
int rc = setrlimit(RLIMIT_NOFILE, &rlim);
if (rc != 0)
LOG_WARNING(log,
"Cannot set max number of file descriptors to " << rlim.rlim_cur
<< ". Try to specify max_open_files according to your system limits. error: "
<< strerror(errno));
LOG_WARNING(log, "Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, strerror(errno));
else
LOG_DEBUG(log, "Set max number of file descriptors to " << rlim.rlim_cur << " (was " << old << ").");
LOG_DEBUG(log, "Set max number of file descriptors to {} (was {}).", rlim.rlim_cur, old);
}
}
@ -372,7 +369,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
/// Initialize DateLUT early, to not interfere with running time of first query.
LOG_DEBUG(log, "Initializing DateLUT.");
DateLUT::instance();
LOG_TRACE(log, "Initialized DateLUT with time zone '" << DateLUT::instance().getTimeZone() << "'.");
LOG_TRACE(log, "Initialized DateLUT with time zone '{}'.", DateLUT::instance().getTimeZone());
/// Storage with temporary data for processing of heavy queries.
@ -431,9 +428,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (this_host.empty())
{
this_host = getFQDNOrHostName();
LOG_DEBUG(log,
"Configuration parameter '" + String(host_tag) + "' doesn't exist or exists and empty. Will use '" + this_host
+ "' as replica host.");
LOG_DEBUG(log, "Configuration parameter '{}' doesn't exist or exists and empty. Will use '{}' as replica host.",
host_tag, this_host);
}
String port_str = config().getString(port_tag);
@ -538,8 +534,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
LOG_INFO(log, "Uncompressed cache size was lowered to " << formatReadableSizeWithBinarySuffix(uncompressed_cache_size)
<< " because the system has low amount of memory");
LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_size);
@ -554,8 +549,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
LOG_INFO(log, "Mark cache size was lowered to " << formatReadableSizeWithBinarySuffix(uncompressed_cache_size)
<< " because the system has low amount of memory");
LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setMarkCache(mark_cache_size);
@ -579,20 +573,19 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (max_server_memory_usage == 0)
{
max_server_memory_usage = default_max_server_memory_usage;
LOG_INFO(log, "Setting max_server_memory_usage was set to " << formatReadableSizeWithBinarySuffix(max_server_memory_usage));
LOG_INFO(log, "Setting max_server_memory_usage was set to {}", formatReadableSizeWithBinarySuffix(max_server_memory_usage));
}
else if (max_server_memory_usage > default_max_server_memory_usage)
{
max_server_memory_usage = default_max_server_memory_usage;
LOG_INFO(log, "Setting max_server_memory_usage was lowered to " << formatReadableSizeWithBinarySuffix(max_server_memory_usage)
<< " because the system has low amount of memory");
LOG_INFO(log, "Setting max_server_memory_usage was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(max_server_memory_usage));
}
total_memory_tracker.setOrRaiseHardLimit(max_server_memory_usage);
total_memory_tracker.setDescription("(total)");
total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
LOG_INFO(log, "Loading metadata from " + path);
LOG_INFO(log, "Loading metadata from {}", path);
try
{
@ -694,17 +687,19 @@ int Server::main(const std::vector<std::string> & /*args*/)
" neither clickhouse-server process has CAP_NET_ADMIN capability."
" 'taskstats' performance statistics will be disabled."
" It could happen due to incorrect ClickHouse package installation."
" You can try to resolve the problem manually with 'sudo setcap cap_net_admin=+ep " << executable_path << "'."
" You can try to resolve the problem manually with 'sudo setcap cap_net_admin=+ep {}'."
" Note that it will not work on 'nosuid' mounted filesystems."
" It also doesn't work if you run clickhouse-server inside network namespace as it happens in some containers.");
" It also doesn't work if you run clickhouse-server inside network namespace as it happens in some containers.",
executable_path);
}
if (!hasLinuxCapability(CAP_SYS_NICE))
{
LOG_INFO(log, "It looks like the process has no CAP_SYS_NICE capability, the setting 'os_thread_nice' will have no effect."
" It could happen due to incorrect ClickHouse package installation."
" You could resolve the problem manually with 'sudo setcap cap_sys_nice=+ep " << executable_path << "'."
" Note that it will not work on 'nosuid' mounted filesystems.");
" You could resolve the problem manually with 'sudo setcap cap_sys_nice=+ep {}'."
" Note that it will not work on 'nosuid' mounted filesystems.",
executable_path);
}
#else
LOG_INFO(log, "TaskStats is not implemented for this OS. IO accounting will be disabled.");
@ -746,11 +741,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
#endif
)
{
LOG_ERROR(log,
"Cannot resolve listen_host (" << host << "), error " << e.code() << ": " << e.message() << ". "
LOG_ERROR(log, "Cannot resolve listen_host ({}), error {}: {}. "
"If it is an IPv6 address and your host has disabled IPv6, then consider to "
"specify IPv4 address to listen in <listen_host> element of configuration "
"file. Example: <listen_host>0.0.0.0</listen_host>");
"file. Example: <listen_host>0.0.0.0</listen_host>",
host, e.code(), e.message());
}
throw;
@ -802,11 +797,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
if (listen_try)
{
LOG_ERROR(log, message
<< ". If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to "
LOG_ERROR(log, "{}. If it is an IPv6 or IPv4 address and your host has disabled IPv6 or IPv4, then consider to "
"specify not disabled IPv4 or IPv6 address to listen in <listen_host> element of configuration "
"file. Example for disabled IPv6: <listen_host>0.0.0.0</listen_host> ."
" Example for disabled IPv4: <listen_host>::</listen_host>");
" Example for disabled IPv4: <listen_host>::</listen_host>",
message);
}
else
{
@ -826,7 +821,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
LOG_INFO(log, "Listening for http://" + address.toString());
LOG_INFO(log, "Listening for http://{}", address.toString());
});
/// HTTPS
@ -840,7 +835,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
LOG_INFO(log, "Listening for https://" + address.toString());
LOG_INFO(log, "Listening for https://{}", address.toString());
#else
UNUSED(port);
throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.",
@ -861,7 +856,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
socket,
new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for connections with native protocol (tcp): " + address.toString());
LOG_INFO(log, "Listening for connections with native protocol (tcp): {}", address.toString());
});
/// TCP with SSL
@ -877,7 +872,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
server_pool,
socket,
new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for connections with secure native protocol (tcp_secure): " + address.toString());
LOG_INFO(log, "Listening for connections with secure native protocol (tcp_secure): {}", address.toString());
#else
UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
@ -895,7 +890,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, http_params));
LOG_INFO(log, "Listening for replica communication (interserver): http://" + address.toString());
LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString());
});
create_server("interserver_https_port", [&](UInt16 port)
@ -908,7 +903,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, http_params));
LOG_INFO(log, "Listening for secure replica communication (interserver): https://" + address.toString());
LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString());
#else
UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
@ -928,7 +923,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
socket,
new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for MySQL compatibility protocol: " + address.toString());
LOG_INFO(log, "Listening for MySQL compatibility protocol: {}", address.toString());
});
/// Prometheus (if defined and not setup yet with http_port)
@ -941,7 +936,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
LOG_INFO(log, "Listening for Prometheus: http://" + address.toString());
LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString());
});
}
@ -966,12 +961,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
dns_cache_updater->start();
{
std::stringstream message;
message << "Available RAM: " << formatReadableSizeWithBinarySuffix(memory_amount) << ";"
<< " physical cores: " << getNumberOfPhysicalCPUCores() << ";"
// on ARM processors it can show only enabled at current moment cores
<< " logical cores: " << std::thread::hardware_concurrency() << ".";
LOG_INFO(log, message.str());
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",
formatReadableSizeWithBinarySuffix(memory_amount),
getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores
std::thread::hardware_concurrency());
}
LOG_INFO(log, "Ready for connections.");
@ -989,9 +982,10 @@ int Server::main(const std::vector<std::string> & /*args*/)
current_connections += server->currentConnections();
}
LOG_INFO(log,
"Closed all listening sockets."
<< (current_connections ? " Waiting for " + toString(current_connections) + " outstanding connections." : ""));
if (current_connections)
LOG_INFO(log, "Closed all listening sockets. Waiting for {} outstanding connections.", current_connections);
else
LOG_INFO(log, "Closed all listening sockets.");
/// Killing remaining queries.
global_context->getProcessList().killAllQueries();
@ -1013,9 +1007,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
}
LOG_INFO(
log, "Closed connections." << (current_connections ? " But " + toString(current_connections) + " remains."
" Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>" : ""));
if (current_connections)
LOG_INFO(log, "Closed connections. But {} remain."
" Tip: To increase wait time add to config: <shutdown_wait_unfinished>60</shutdown_wait_unfinished>", current_connections);
else
LOG_INFO(log, "Closed connections.");
dns_cache_updater.reset();
main_config_reloader.reset();

View File

@ -28,7 +28,7 @@
#include <Compression/CompressionFactory.h>
#include <common/logger_useful.h>
#include <Processors/Executors/PullingPipelineExecutor.h>
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
#include "TCPHandler.h"
@ -115,8 +115,7 @@ void TCPHandler::runImpl()
if (!DatabaseCatalog::instance().isDatabaseExist(default_database))
{
Exception e("Database " + backQuote(default_database) + " doesn't exist", ErrorCodes::UNKNOWN_DATABASE);
LOG_ERROR(log, "Code: " << e.code() << ", e.displayText() = " << e.displayText()
<< ", Stack trace:\n\n" << e.getStackTraceString());
LOG_ERROR(log, "Code: {}, e.displayText() = {}, Stack trace:\n\n{}", e.code(), e.displayText(), e.getStackTraceString());
sendException(e, connection_context.getSettingsRef().calculate_text_stack_trace);
return;
}
@ -278,8 +277,11 @@ void TCPHandler::runImpl()
sendLogs();
sendEndOfStream();
query_scope.reset();
/// QueryState should be cleared before QueryScope, since otherwise
/// the MemoryTracker will be wrong for possible deallocations.
/// (i.e. deallocations from the Aggregator with two-level aggregation)
state.reset();
query_scope.reset();
}
catch (const Exception & e)
{
@ -359,8 +361,11 @@ void TCPHandler::runImpl()
try
{
query_scope.reset();
/// QueryState should be cleared before QueryScope, since otherwise
/// the MemoryTracker will be wrong for possible deallocations.
/// (i.e. deallocations from the Aggregator with two-level aggregation)
state.reset();
query_scope.reset();
}
catch (...)
{
@ -373,8 +378,7 @@ void TCPHandler::runImpl()
watch.stop();
LOG_INFO(log, std::fixed << std::setprecision(3)
<< "Processed in " << watch.elapsedSeconds() << " sec.");
LOG_INFO(log, "Processed in {} sec.", watch.elapsedSeconds());
/// It is important to destroy query context here. We do not want it to live arbitrarily longer than the query.
query_context.reset();
@ -560,7 +564,7 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
}
{
PullingPipelineExecutor executor(pipeline);
PullingAsyncPipelineExecutor executor(pipeline);
CurrentMetrics::Increment query_thread_metric_increment{CurrentMetrics::QueryThread};
Block block;
@ -726,14 +730,12 @@ void TCPHandler::receiveHello()
readStringBinary(user, *in);
readStringBinary(password, *in);
LOG_DEBUG(log, "Connected " << client_name
<< " version " << client_version_major
<< "." << client_version_minor
<< "." << client_version_patch
<< ", revision: " << client_revision
<< (!default_database.empty() ? ", database: " + default_database : "")
<< (!user.empty() ? ", user: " + user : "")
<< ".");
LOG_DEBUG(log, "Connected {} version {}.{}.{}, revision: {}{}{}.",
client_name,
client_version_major, client_version_minor, client_version_patch,
client_revision,
(!default_database.empty() ? ", database: " + default_database : ""),
(!user.empty() ? ", user: " + user : ""));
connection_context.setUser(user, password, socket().peerAddress());
}
@ -1199,8 +1201,7 @@ void TCPHandler::run()
/// Timeout - not an error.
if (!strcmp(e.what(), "Timeout"))
{
LOG_DEBUG(log, "Poco::Exception. Code: " << ErrorCodes::POCO_EXCEPTION << ", e.code() = " << e.code()
<< ", e.displayText() = " << e.displayText() << ", e.what() = " << e.what());
LOG_DEBUG(log, "Poco::Exception. Code: {}, e.code() = {}, e.displayText() = {}, e.what() = {}", ErrorCodes::POCO_EXCEPTION, e.code(), e.displayText(), e.what());
}
else
throw;

View File

@ -35,7 +35,7 @@ public:
{
try
{
LOG_TRACE(log, "TCP Request. Address: " << socket.peerAddress().toString());
LOG_TRACE(log, "TCP Request. Address: {}", socket.peerAddress().toString());
return new TCPHandler(server, socket);
}
catch (const Poco::Net::NetException &)

View File

@ -227,7 +227,7 @@
and to prevent clickhouse executable from being paged out under high IO load.
Enabling this option is recommended but will lead to increased startup time for up to a few seconds.
-->
<mlock_executable>false</mlock_executable>
<mlock_executable>true</mlock_executable>
<!-- Configuration of clusters that could be used in Distributed tables.
https://clickhouse.tech/docs/en/operations/table_engines/distributed/
@ -405,6 +405,9 @@
</prometheus>
-->
<!-- Lazy system.*_log table creation -->
<!-- <system_tables_lazy_load>false</system_tables_lazy_load> -->
<!-- Query log. Used only for queries with setting log_queries = 1. -->
<query_log>
<!-- What table to insert data. If table is not exist, it will be created.

View File

@ -251,12 +251,11 @@ public:
void logTree(Poco::Logger * log) const
{
LOG_TRACE(log, "Tree(" << level << "): name=" << (node_name ? *node_name : "NULL")
<< ", access=" << access.toString()
<< ", final_access=" << final_access.toString()
<< ", min_access=" << min_access.toString()
<< ", max_access=" << max_access.toString()
<< ", num_children=" << (children ? children->size() : 0));
LOG_TRACE(log, "Tree({}): name={}, access={}, final_access={}, min_access={}, max_access={}, num_children={}",
level, node_name ? *node_name : "NULL", access.toString(),
final_access.toString(), min_access.toString(), max_access.toString(),
(children ? children->size() : 0));
if (children)
{
for (auto & child : *children | boost::adaptors::map_values)

View File

@ -310,8 +310,8 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const
/// Try to ignore DNS errors: if host cannot be resolved, skip it and try next.
LOG_WARNING(
&Logger::get("AddressPatterns"),
"Failed to check if the allowed client hosts contain address " << client_address.toString() << ". " << e.displayText()
<< ", code = " << e.code());
"Failed to check if the allowed client hosts contain address {}. {}, code = {}",
client_address.toString(), e.displayText(), e.code());
return false;
}
};
@ -343,8 +343,8 @@ bool AllowedClientHosts::contains(const IPAddress & client_address) const
/// Try to ignore DNS errors: if host cannot be resolved, skip it and try next.
LOG_WARNING(
&Logger::get("AddressPatterns"),
"Failed to check if the allowed client hosts contain address " << client_address.toString() << ". " << e.displayText()
<< ", code = " << e.code());
"Failed to check if the allowed client hosts contain address {}. {}, code = {}",
client_address.toString(), e.displayText(), e.code());
return false;
}
};

View File

@ -200,7 +200,7 @@ bool ContextAccess::calculateResultAccessAndCheck(Poco::Logger * log_, const Acc
bool is_granted = access->isGranted(flags, args...);
if (trace_log)
LOG_TRACE(trace_log, "Access " << (is_granted ? "granted" : "denied") << ": " << (AccessRightsElement{flags, args...}.toString()));
LOG_TRACE(trace_log, "Access {}: {}", (is_granted ? "granted" : "denied"), (AccessRightsElement{flags, args...}.toString()));
if (is_granted)
return true;
@ -219,7 +219,7 @@ bool ContextAccess::calculateResultAccessAndCheck(Poco::Logger * log_, const Acc
if constexpr (mode == THROW_IF_ACCESS_DENIED)
throw Exception(user_name + ": " + msg, error_code);
else if constexpr (mode == LOG_WARNING_IF_ACCESS_DENIED)
LOG_WARNING(log_, user_name + ": " + msg + formatSkippedMessage(args...));
LOG_WARNING(log_, "{}: {}{}", user_name, msg, formatSkippedMessage(args...));
};
if (!user)
@ -451,15 +451,18 @@ boost::shared_ptr<const AccessRights> ContextAccess::calculateResultAccess(bool
if (trace_log && (params.readonly == readonly_) && (params.allow_ddl == allow_ddl_) && (params.allow_introspection == allow_introspection_))
{
LOG_TRACE(trace_log, "List of all grants: " << merged_access->toString() << (grant_option ? " WITH GRANT OPTION" : ""));
if (grant_option)
LOG_TRACE(trace_log, "List of all grants: {} WITH GRANT OPTION", merged_access->toString());
else
LOG_TRACE(trace_log, "List of all grants: {}", merged_access->toString());
if (roles_info && !roles_info->getCurrentRolesNames().empty())
{
LOG_TRACE(
trace_log,
"Current_roles: " << boost::algorithm::join(roles_info->getCurrentRolesNames(), ", ")
<< ", enabled_roles: " << boost::algorithm::join(roles_info->getEnabledRolesNames(), ", "));
LOG_TRACE(trace_log, "Current_roles: {}, enabled_roles: {}",
boost::algorithm::join(roles_info->getCurrentRolesNames(), ", "),
boost::algorithm::join(roles_info->getEnabledRolesNames(), ", "));
}
LOG_TRACE(trace_log, "Settings: readonly=" << readonly_ << ", allow_ddl=" << allow_ddl_ << ", allow_introspection_functions=" << allow_introspection_);
LOG_TRACE(trace_log, "Settings: readonly={}, allow_ddl={}, allow_introspection_functions={}", readonly_, allow_ddl_, allow_introspection_);
}
res = std::move(merged_access);

View File

@ -367,7 +367,7 @@ bool DiskAccessStorage::readLists()
auto file_path = getListFilePath(directory_path, type);
if (!std::filesystem::exists(file_path))
{
LOG_WARNING(getLogger(), "File " + file_path.string() + " doesn't exist");
LOG_WARNING(getLogger(), "File {} doesn't exist", file_path.string());
ok = false;
break;
}
@ -496,7 +496,7 @@ void DiskAccessStorage::listsWritingThreadFunc()
/// and then saves the files "users.list", "roles.list", etc. to the same directory.
bool DiskAccessStorage::rebuildLists()
{
LOG_WARNING(getLogger(), "Recovering lists in directory " + directory_path);
LOG_WARNING(getLogger(), "Recovering lists in directory {}", directory_path);
clear();
for (const auto & directory_entry : std::filesystem::directory_iterator(directory_path))

View File

@ -4,6 +4,7 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnNullable.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnsCommon.h>
#include <DataTypes/DataTypeNullable.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
@ -53,13 +54,13 @@ protected:
static void initFlag(AggregateDataPtr place) noexcept
{
if (result_is_nullable)
if constexpr (result_is_nullable)
place[0] = 0;
}
static void setFlag(AggregateDataPtr place) noexcept
{
if (result_is_nullable)
if constexpr (result_is_nullable)
place[0] = 1;
}
@ -72,7 +73,7 @@ public:
AggregateFunctionNullBase(AggregateFunctionPtr nested_function_, const DataTypes & arguments, const Array & params)
: IAggregateFunctionHelper<Derived>(arguments, params), nested_function{nested_function_}
{
if (result_is_nullable)
if constexpr (result_is_nullable)
prefix_size = nested_function->alignOfData();
else
prefix_size = 0;
@ -128,7 +129,7 @@ public:
void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override
{
bool flag = getFlag(place);
if (result_is_nullable)
if constexpr (result_is_nullable)
writeBinary(flag, buf);
if (flag)
nested_function->serialize(nestedPlace(place), buf);
@ -137,7 +138,7 @@ public:
void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override
{
bool flag = 1;
if (result_is_nullable)
if constexpr (result_is_nullable)
readBinary(flag, buf);
if (flag)
{
@ -148,7 +149,7 @@ public:
void insertResultInto(AggregateDataPtr place, IColumn & to) const override
{
if (result_is_nullable)
if constexpr (result_is_nullable)
{
ColumnNullable & to_concrete = assert_cast<ColumnNullable &>(to);
if (getFlag(place))
@ -194,13 +195,26 @@ public:
void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
const IColumn * nested_column = &column->getNestedColumn();
if (!column->isNullAt(row_num))
{
this->setFlag(place);
const IColumn * nested_column = &column->getNestedColumn();
this->nested_function->add(this->nestedPlace(place), &nested_column, row_num, arena);
}
}
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const override
{
const ColumnNullable * column = assert_cast<const ColumnNullable *>(columns[0]);
const IColumn * nested_column = &column->getNestedColumn();
const UInt8 * null_map = column->getNullMapData().data();
this->nested_function->addBatchSinglePlaceNotNull(batch_size, this->nestedPlace(place), &nested_column, null_map, arena);
if constexpr (result_is_nullable)
if (!memoryIsByte(null_map, batch_size, 1))
this->setFlag(place);
}
};

View File

@ -20,11 +20,72 @@ struct AggregateFunctionSumData
{
T sum{};
void add(T value)
void ALWAYS_INLINE add(T value)
{
sum += value;
}
/// Vectorized version
template <typename Value>
void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
{
/// Compiler cannot unroll this loop, do it manually.
/// (at least for floats, most likely due to the lack of -fassociative-math)
/// Something around the number of SSE registers * the number of elements fit in register.
constexpr size_t unroll_count = 128 / sizeof(T);
T partial_sums[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
partial_sums[i] += ptr[i];
ptr += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
sum += partial_sums[i];
while (ptr < end)
{
sum += *ptr;
++ptr;
}
}
template <typename Value>
void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
{
constexpr size_t unroll_count = 128 / sizeof(T);
T partial_sums[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
if (!null_map[i])
partial_sums[i] += ptr[i];
ptr += unroll_count;
null_map += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
sum += partial_sums[i];
while (ptr < end)
{
if (!*null_map)
sum += *ptr;
++ptr;
++null_map;
}
}
void merge(const AggregateFunctionSumData & rhs)
{
sum += rhs.sum;
@ -55,21 +116,95 @@ struct AggregateFunctionSumKahanData
T sum{};
T compensation{};
void add(T value)
template <typename Value>
void ALWAYS_INLINE addImpl(Value value, T & out_sum, T & out_compensation)
{
auto compensated_value = value - compensation;
auto new_sum = sum + compensated_value;
compensation = (new_sum - sum) - compensated_value;
sum = new_sum;
auto compensated_value = value - out_compensation;
auto new_sum = out_sum + compensated_value;
out_compensation = (new_sum - out_sum) - compensated_value;
out_sum = new_sum;
}
void ALWAYS_INLINE add(T value)
{
addImpl(value, sum, compensation);
}
/// Vectorized version
template <typename Value>
void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
{
/// Less than in ordinary sum, because the algorithm is more complicated and too large loop unrolling is questionable.
/// But this is just a guess.
constexpr size_t unroll_count = 4;
T partial_sums[unroll_count]{};
T partial_compensations[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
ptr += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
while (ptr < end)
{
addImpl(*ptr, sum, compensation);
++ptr;
}
}
template <typename Value>
void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
{
constexpr size_t unroll_count = 4;
T partial_sums[unroll_count]{};
T partial_compensations[unroll_count]{};
const auto * end = ptr + count;
const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
while (ptr < unrolled_end)
{
for (size_t i = 0; i < unroll_count; ++i)
if (!null_map[i])
addImpl(ptr[i], partial_sums[i], partial_compensations[i]);
ptr += unroll_count;
null_map += unroll_count;
}
for (size_t i = 0; i < unroll_count; ++i)
mergeImpl(sum, compensation, partial_sums[i], partial_compensations[i]);
while (ptr < end)
{
if (!*null_map)
addImpl(*ptr, sum, compensation);
++ptr;
++null_map;
}
}
void ALWAYS_INLINE mergeImpl(T & to_sum, T & to_compensation, T from_sum, T from_compensation)
{
auto raw_sum = to_sum + from_sum;
auto rhs_compensated = raw_sum - to_sum;
/// Kahan summation is tricky because it depends on non-associativity of float arithmetic.
/// Do not simplify this expression if you are not sure.
auto compensations = ((from_sum - rhs_compensated) + (to_sum - (raw_sum - rhs_compensated))) + compensation + from_compensation;
to_sum = raw_sum + compensations;
to_compensation = compensations - (to_sum - raw_sum);
}
void merge(const AggregateFunctionSumKahanData & rhs)
{
auto raw_sum = sum + rhs.sum;
auto rhs_compensated = raw_sum - sum;
auto compensations = ((rhs.sum - rhs_compensated) + (sum - (raw_sum - rhs_compensated))) + compensation + rhs.compensation;
sum = raw_sum + compensations;
compensation = compensations - (sum - raw_sum);
mergeImpl(sum, compensation, rhs.sum, rhs.compensation);
}
void write(WriteBuffer & buf) const
@ -141,6 +276,20 @@ public:
this->data(place).add(column.getData()[row_num]);
}
/// Vectorized version when there is no GROUP BY keys.
void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *) const override
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
this->data(place).addMany(column.getData().data(), batch_size);
}
void addBatchSinglePlaceNotNull(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena *) const override
{
const auto & column = static_cast<const ColVecType &>(*columns[0]);
this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size);
}
void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override
{
this->data(place).merge(this->data(rhs));

View File

@ -145,6 +145,11 @@ public:
*/
virtual void addBatchSinglePlace(size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena * arena) const = 0;
/** The same for single place when need to aggregate only filtered data.
*/
virtual void addBatchSinglePlaceNotNull(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const = 0;
/** In addition to addBatch, this method collects multiple rows of arguments into array "places"
* as long as they are between offsets[i-1] and offsets[i]. This is used for arrayReduce and
* -Array combinator. It might also be used generally to break data dependency when array
@ -201,6 +206,14 @@ public:
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
void addBatchSinglePlaceNotNull(
size_t batch_size, AggregateDataPtr place, const IColumn ** columns, const UInt8 * null_map, Arena * arena) const override
{
for (size_t i = 0; i < batch_size; ++i)
if (!null_map[i])
static_cast<const Derived *>(this)->add(place, columns, i, arena);
}
void addBatchArray(
size_t batch_size, AggregateDataPtr * places, size_t place_offset, const IColumn ** columns, const UInt64 * offsets, Arena * arena)
const override

View File

@ -103,6 +103,7 @@ class QuantileTDigest
struct RadixSortTraits
{
using Element = Centroid;
using Result = Element;
using Key = Value;
using CountType = UInt32;
using KeyBits = UInt32;
@ -114,6 +115,7 @@ class QuantileTDigest
/// The function to get the key from an array element.
static Key & extractKey(Element & elem) { return elem.mean; }
static Result & extractResult(Element & elem) { return elem; }
};
/** Adds a centroid `c` to the digest

View File

@ -61,11 +61,11 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
if (connected)
disconnect();
LOG_TRACE(log_wrapper.get(), "Connecting. Database: "
<< (default_database.empty() ? "(not specified)" : default_database)
<< ". User: " << user
<< (static_cast<bool>(secure) ? ". Secure" : "")
<< (static_cast<bool>(compression) ? "" : ". Uncompressed"));
LOG_TRACE(log_wrapper.get(), "Connecting. Database: {}. User: {}{}{}",
default_database.empty() ? "(not specified)" : default_database,
user,
static_cast<bool>(secure) ? ". Secure" : "",
static_cast<bool>(compression) ? "" : ". Uncompressed");
if (static_cast<bool>(secure))
{
@ -107,11 +107,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
sendHello();
receiveHello();
LOG_TRACE(log_wrapper.get(), "Connected to " << server_name
<< " server version " << server_version_major
<< "." << server_version_minor
<< "." << server_version_patch
<< ".");
LOG_TRACE(log_wrapper.get(), "Connected to {} server version {}.{}.{}.",
server_name, server_version_major, server_version_minor, server_version_patch);
}
catch (Poco::Net::NetException & e)
{
@ -132,8 +129,6 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
void Connection::disconnect()
{
//LOG_TRACE(log_wrapper.get(), "Disconnecting");
in = nullptr;
last_input_packet_type.reset();
out = nullptr; // can write to socket
@ -186,8 +181,6 @@ void Connection::sendHello()
void Connection::receiveHello()
{
//LOG_TRACE(log_wrapper.get(), "Receiving hello");
/// Receive hello packet.
UInt64 packet_type = 0;
@ -391,8 +384,6 @@ void Connection::sendQuery(
query_id = query_id_;
//LOG_TRACE(log_wrapper.get(), "Sending query");
writeVarUInt(Protocol::Client::Query, *out);
writeStringBinary(query_id, *out);
@ -441,8 +432,6 @@ void Connection::sendCancel()
if (!out)
return;
//LOG_TRACE(log_wrapper.get(), "Sending cancel");
writeVarUInt(Protocol::Client::Cancel, *out);
out->next();
}
@ -450,8 +439,6 @@ void Connection::sendCancel()
void Connection::sendData(const Block & block, const String & name, bool scalar)
{
//LOG_TRACE(log_wrapper.get(), "Sending data");
if (!block_out)
{
if (compression == Protocol::Compression::Enable)
@ -516,19 +503,23 @@ void Connection::sendScalarsData(Scalars & data)
maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
double elapsed = watch.elapsedSeconds();
std::stringstream msg;
msg << std::fixed << std::setprecision(3);
msg << "Sent data for " << data.size() << " scalars, total " << rows << " rows in " << elapsed << " sec., "
<< static_cast<size_t>(rows / watch.elapsedSeconds()) << " rows/sec., "
<< maybe_compressed_out_bytes / 1048576.0 << " MiB (" << maybe_compressed_out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
if (compression == Protocol::Compression::Enable)
msg << ", compressed " << static_cast<double>(maybe_compressed_out_bytes) / out_bytes << " times to "
<< out_bytes / 1048576.0 << " MiB (" << out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
LOG_DEBUG(log_wrapper.get(),
"Sent data for {} scalars, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), compressed {} times to {} ({}/sec.)",
data.size(), rows, elapsed,
static_cast<size_t>(rows / watch.elapsedSeconds()),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()),
static_cast<double>(maybe_compressed_out_bytes) / out_bytes,
formatReadableSizeWithBinarySuffix(out_bytes),
formatReadableSizeWithBinarySuffix(out_bytes / watch.elapsedSeconds()));
else
msg << ", no compression.";
LOG_DEBUG(log_wrapper.get(), msg.rdbuf());
LOG_DEBUG(log_wrapper.get(),
"Sent data for {} scalars, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), no compression.",
data.size(), rows, elapsed,
static_cast<size_t>(rows / watch.elapsedSeconds()),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()));
}
namespace
@ -616,19 +607,23 @@ void Connection::sendExternalTablesData(ExternalTablesData & data)
maybe_compressed_out_bytes = maybe_compressed_out->count() - maybe_compressed_out_bytes;
double elapsed = watch.elapsedSeconds();
std::stringstream msg;
msg << std::fixed << std::setprecision(3);
msg << "Sent data for " << data.size() << " external tables, total " << rows << " rows in " << elapsed << " sec., "
<< static_cast<size_t>(rows / watch.elapsedSeconds()) << " rows/sec., "
<< maybe_compressed_out_bytes / 1048576.0 << " MiB (" << maybe_compressed_out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
if (compression == Protocol::Compression::Enable)
msg << ", compressed " << static_cast<double>(maybe_compressed_out_bytes) / out_bytes << " times to "
<< out_bytes / 1048576.0 << " MiB (" << out_bytes / 1048576.0 / watch.elapsedSeconds() << " MiB/sec.)";
LOG_DEBUG(log_wrapper.get(),
"Sent data for {} external tables, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), compressed {} times to {} ({}/sec.)",
data.size(), rows, elapsed,
static_cast<size_t>(rows / watch.elapsedSeconds()),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()),
static_cast<double>(maybe_compressed_out_bytes) / out_bytes,
formatReadableSizeWithBinarySuffix(out_bytes),
formatReadableSizeWithBinarySuffix(out_bytes / watch.elapsedSeconds()));
else
msg << ", no compression.";
LOG_DEBUG(log_wrapper.get(), msg.rdbuf());
LOG_DEBUG(log_wrapper.get(),
"Sent data for {} external tables, total {} rows in {} sec., {} rows/sec., {} ({}/sec.), no compression.",
data.size(), rows, elapsed,
static_cast<size_t>(rows / watch.elapsedSeconds()),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes),
formatReadableSizeWithBinarySuffix(maybe_compressed_out_bytes / watch.elapsedSeconds()));
}
std::optional<Poco::Net::SocketAddress> Connection::getResolvedAddress() const
@ -682,12 +677,9 @@ Packet Connection::receivePacket()
}
else
{
//LOG_TRACE(log_wrapper.get(), "Receiving packet type");
readVarUInt(res.type, *in);
}
//LOG_TRACE(log_wrapper.get(), "Receiving packet " << res.type << " " << Protocol::Server::toString(res.type));
//std::cerr << "Client got packet: " << Protocol::Server::toString(res.type) << "\n";
switch (res.type)
{
case Protocol::Server::Data: [[fallthrough]];
@ -740,8 +732,6 @@ Packet Connection::receivePacket()
Block Connection::receiveData()
{
//LOG_TRACE(log_wrapper.get(), "Receiving data");
initBlockInput();
return receiveDataImpl(block_in);
}
@ -820,8 +810,6 @@ void Connection::setDescription()
std::unique_ptr<Exception> Connection::receiveException()
{
//LOG_TRACE(log_wrapper.get(), "Receiving exception");
return std::make_unique<Exception>(readException(*in, "Received from " + getDescription()));
}
@ -838,8 +826,6 @@ std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)
Progress Connection::receiveProgress()
{
//LOG_TRACE(log_wrapper.get(), "Receiving progress");
Progress progress;
progress.read(*in, server_revision);
return progress;

View File

@ -50,6 +50,8 @@ class Connection;
using ConnectionPtr = std::shared_ptr<Connection>;
using Connections = std::vector<ConnectionPtr>;
using Scalars = std::map<String, Block>;
/// Packet that could be received from server.
struct Packet

View File

@ -222,8 +222,8 @@ ConnectionPoolWithFailover::tryGetEntry(
auto table_status_it = status_response.table_states_by_id.find(*table_to_check);
if (table_status_it == status_response.table_states_by_id.end())
{
fail_message = "There is no table " + table_to_check->database + "." + table_to_check->table
+ " on server: " + result.entry->getDescription();
const char * message_pattern = "There is no table {}.{} on server: {}";
fail_message = fmt::format(message_pattern, backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription());
LOG_WARNING(log, fail_message);
ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable);
@ -248,10 +248,7 @@ ConnectionPoolWithFailover::tryGetEntry(
result.is_up_to_date = false;
result.staleness = delay;
LOG_TRACE(
log, "Server " << result.entry->getDescription() << " has unacceptable replica delay "
<< "for table " << table_to_check->database << "." << table_to_check->table
<< ": " << delay);
LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
}
}

View File

@ -35,7 +35,7 @@ TimeoutSetter::~TimeoutSetter()
catch (std::exception & e)
{
// Sometimes catched on macos
LOG_ERROR(&Logger::get("Client"), std::string{"TimeoutSetter: Can't reset timeouts: "} + e.what());
LOG_ERROR(&Logger::get("Client"), "TimeoutSetter: Can't reset timeouts: {}", e.what());
}
}
}

View File

@ -17,6 +17,7 @@
#include <DataStreams/ColumnGathererStream.h>
#include <ext/bit_cast.h>
#include <pdqsort.h>
#include <numeric>
#if !defined(ARCADIA_BUILD)
# include <Common/config.h>
@ -117,7 +118,10 @@ namespace
struct RadixSortTraits : RadixSortNumTraits<T>
{
using Element = ValueWithIndex<T>;
using Result = size_t;
static T & extractKey(Element & elem) { return elem.value; }
static size_t extractResult(Element & elem) { return elem.index; }
};
}
@ -179,53 +183,27 @@ void ColumnVector<T>::getPermutation(bool reverse, size_t limit, int nan_directi
for (UInt32 i = 0; i < UInt32(s); ++i)
pairs[i] = {data[i], i};
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s);
RadixSort<RadixSortTraits<T>>::executeLSD(pairs.data(), s, reverse, res.data());
/// Radix sort treats all NaNs to be greater than all numbers.
/// If the user needs the opposite, we must move them accordingly.
size_t nans_to_move = 0;
if (std::is_floating_point_v<T> && nan_direction_hint < 0)
{
for (ssize_t i = s - 1; i >= 0; --i)
size_t nans_to_move = 0;
for (size_t i = 0; i < s; ++i)
{
if (isNaN(pairs[i].value))
if (isNaN(data[res[reverse ? i : s - 1 - i]]))
++nans_to_move;
else
break;
}
}
if (reverse)
{
if (nans_to_move)
{
for (size_t i = 0; i < s - nans_to_move; ++i)
res[i] = pairs[s - nans_to_move - 1 - i].index;
for (size_t i = s - nans_to_move; i < s; ++i)
res[i] = pairs[s - 1 - (i - (s - nans_to_move))].index;
}
else
{
for (size_t i = 0; i < s; ++i)
res[s - 1 - i] = pairs[i].index;
std::rotate(std::begin(res), std::begin(res) + (reverse ? nans_to_move : s - nans_to_move), std::end(res));
}
}
else
{
if (nans_to_move)
{
for (size_t i = 0; i < nans_to_move; ++i)
res[i] = pairs[i + s - nans_to_move].index;
for (size_t i = nans_to_move; i < s; ++i)
res[i] = pairs[i - nans_to_move].index;
}
else
{
for (size_t i = 0; i < s; ++i)
res[i] = pairs[i].index;
}
}
return;
}
}

View File

@ -5,6 +5,7 @@
#include <cstdlib>
#include <cstring>
#include <algorithm>
#include <sstream>
#include <iostream>
#include <functional>
#include <Poco/DOM/Text.h>
@ -303,7 +304,7 @@ void ConfigProcessor::doIncludesRecursive(
else if (throw_on_bad_incl)
throw Poco::Exception(error_msg + name);
else
LOG_WARNING(log, error_msg << name);
LOG_WARNING(log, "{}{}", error_msg, name);
}
else
{
@ -440,7 +441,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
zkutil::ZooKeeperNodeCache * zk_node_cache,
const zkutil::EventPtr & zk_changed_event)
{
LOG_DEBUG(log, "Processing configuration file '" + path + "'.");
LOG_DEBUG(log, "Processing configuration file '{}'.", path);
XMLDocumentPtr config = dom_parser.parse(path);
@ -451,7 +452,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
{
try
{
LOG_DEBUG(log, "Merging configuration file '" + merge_file + "'.");
LOG_DEBUG(log, "Merging configuration file '{}'.", merge_file);
XMLDocumentPtr with = dom_parser.parse(merge_file);
merge(config, with);
@ -488,7 +489,7 @@ XMLDocumentPtr ConfigProcessor::processConfig(
}
if (!include_from_path.empty())
{
LOG_DEBUG(log, "Including configuration file '" + include_from_path + "'.");
LOG_DEBUG(log, "Including configuration file '{}'.", include_from_path);
contributing_files.push_back(include_from_path);
include_from = dom_parser.parse(include_from_path);
@ -568,10 +569,7 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
if (!zk_exception)
throw;
LOG_WARNING(
log,
"Error while processing from_zk config includes: " + zk_exception->message() +
". Config will be loaded from preprocessed file: " + preprocessed_path);
LOG_WARNING(log, "Error while processing from_zk config includes: {}. Config will be loaded from preprocessed file: {}", zk_exception->message(), preprocessed_path);
config_xml = dom_parser.parse(preprocessed_path);
}
@ -619,11 +617,11 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
Poco::File(preprocessed_path_parent).createDirectories();
}
DOMWriter().writeNode(preprocessed_path, loaded_config.preprocessed_xml);
LOG_DEBUG(log, "Saved preprocessed configuration to '" << preprocessed_path << "'.");
LOG_DEBUG(log, "Saved preprocessed configuration to '{}'.", preprocessed_path);
}
catch (Poco::Exception & e)
{
LOG_WARNING(log, "Couldn't save preprocessed config to " << preprocessed_path << ": " << e.displayText());
LOG_WARNING(log, "Couldn't save preprocessed config to {}: {}", preprocessed_path, e.displayText());
}
}

View File

@ -87,7 +87,7 @@ void ConfigReloader::reloadIfNewer(bool force, bool throw_on_error, bool fallbac
ConfigProcessor::LoadedConfig loaded_config;
try
{
LOG_DEBUG(log, "Loading config '" << path << "'");
LOG_DEBUG(log, "Loading config '{}'", path);
loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true);
if (loaded_config.has_zk_includes)

View File

@ -202,7 +202,7 @@ bool DNSResolver::updateCache()
}
if (!lost_hosts.empty())
LOG_INFO(&Logger::get("DNSResolver"), "Cached hosts not found: " << lost_hosts);
LOG_INFO(&Logger::get("DNSResolver"), "Cached hosts not found: {}", lost_hosts);
return updated;
}

View File

@ -36,7 +36,7 @@ Exception::Exception(const std::string & msg, int code)
#ifndef NDEBUG
if (code == ErrorCodes::LOGICAL_ERROR)
{
LOG_ERROR(&Poco::Logger::root(), "Logical error: '" + msg + "'.");
LOG_ERROR(&Poco::Logger::root(), "Logical error: '{}'.", msg);
assert(false);
}
#endif
@ -125,7 +125,10 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_
{
try
{
LOG_ERROR(logger, start_of_message << (start_of_message.empty() ? "" : ": ") << getCurrentExceptionMessage(true));
if (start_of_message.empty())
LOG_ERROR(logger, "{}", getCurrentExceptionMessage(true));
else
LOG_ERROR(logger, "{}: {}", start_of_message, getCurrentExceptionMessage(true));
}
catch (...)
{

Some files were not shown because too many files have changed in this diff Show More