mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-15 02:41:59 +00:00
Fixed logging for events + added hardware event and a couple of custom ones
This commit is contained in:
parent
f87b062f9f
commit
f769a51331
@ -178,6 +178,17 @@
|
||||
M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \
|
||||
M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \
|
||||
\
|
||||
M(PERF_COUNT_HW_CPU_CYCLES, "") \
|
||||
M(PERF_COUNT_HW_INSTRUCTIONS, "") \
|
||||
M(PERF_COUNT_HW_CACHE_REFERENCES, "") \
|
||||
M(PERF_COUNT_HW_CACHE_MISSES, "") \
|
||||
M(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "") \
|
||||
M(PERF_COUNT_HW_BRANCH_MISSES, "") \
|
||||
M(PERF_COUNT_HW_BUS_CYCLES, "") \
|
||||
M(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "") \
|
||||
M(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, "") \
|
||||
M(PERF_COUNT_HW_REF_CPU_CYCLES, "") \
|
||||
\
|
||||
M(PERF_COUNT_SW_TASK_CLOCK, "A clock count specific to the task that is running") \
|
||||
M(PERF_COUNT_SW_PAGE_FAULTS, "Number of page faults") \
|
||||
M(PERF_COUNT_SW_CONTEXT_SWITCHES, "Number of context switches") \
|
||||
@ -187,6 +198,9 @@
|
||||
M(PERF_COUNT_SW_ALIGNMENT_FAULTS, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \
|
||||
M(PERF_COUNT_SW_EMULATION_FAULTS, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \
|
||||
\
|
||||
M(PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE_SCALED, "") \
|
||||
M(PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE, "") \
|
||||
\
|
||||
M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
|
||||
\
|
||||
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
|
||||
|
@ -12,56 +12,103 @@ namespace DB {
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
// todo: think about event counters' overflow
|
||||
// todo: ask about the usual error reporting (whether stderr is an accepted way)
|
||||
static PerfEventInfo softwareEvent(int event_config, ProfileEvents::Event profile_event)
|
||||
{
|
||||
return PerfEventInfo
|
||||
{
|
||||
.event_type = perf_type_id::PERF_TYPE_SOFTWARE,
|
||||
.event_config = event_config,
|
||||
.profile_event = profile_event
|
||||
};
|
||||
}
|
||||
|
||||
static PerfEventInfo hardwareEvent(int event_config, ProfileEvents::Event profile_event)
|
||||
{
|
||||
return PerfEventInfo
|
||||
{
|
||||
.event_type = perf_type_id::PERF_TYPE_HARDWARE,
|
||||
.event_config = event_config,
|
||||
.profile_event = profile_event
|
||||
};
|
||||
}
|
||||
|
||||
// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
|
||||
const int PerfEventsCounters::perf_event_configs[] = {
|
||||
const PerfEventInfo PerfEventsCounters::perf_raw_events_info[] = {
|
||||
|
||||
// Total cycles. Be wary of what happens during CPU frequency scaling.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_CPU_CYCLES, ProfileEvents::PERF_COUNT_HW_CPU_CYCLES),
|
||||
// Retired instructions. Be careful, these can be affected by various issues, most notably hardware
|
||||
// interrupt counts.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_INSTRUCTIONS, ProfileEvents::PERF_COUNT_HW_INSTRUCTIONS),
|
||||
// Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU.
|
||||
// This may include prefetches and coherency messages; again this depends on the design of your CPU.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_CACHE_REFERENCES, ProfileEvents::PERF_COUNT_HW_CACHE_REFERENCES),
|
||||
// Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in con‐junction
|
||||
// with the PERF_COUNT_HW_CACHE_REFERENCES event to calculate cache miss rates.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_CACHE_MISSES, ProfileEvents::PERF_COUNT_HW_CACHE_MISSES),
|
||||
// Retired branch instructions. Prior to Linux 2.6.35, this used the wrong event on AMD processors.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_BRANCH_INSTRUCTIONS, ProfileEvents::PERF_COUNT_HW_BRANCH_INSTRUCTIONS),
|
||||
// Mispredicted branch instructions.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_BRANCH_MISSES, ProfileEvents::PERF_COUNT_HW_BRANCH_MISSES),
|
||||
// Bus cycles, which can be different from total cycles.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_BUS_CYCLES, ProfileEvents::PERF_COUNT_HW_BUS_CYCLES),
|
||||
// Stalled cycles during issue.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, ProfileEvents::PERF_COUNT_HW_STALLED_CYCLES_FRONTEND),
|
||||
// Stalled cycles during retirement.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_STALLED_CYCLES_BACKEND, ProfileEvents::PERF_COUNT_HW_STALLED_CYCLES_BACKEND),
|
||||
// Total cycles; not affected by CPU frequency scaling.
|
||||
hardwareEvent(perf_hw_id::PERF_COUNT_HW_REF_CPU_CYCLES, ProfileEvents::PERF_COUNT_HW_REF_CPU_CYCLES),
|
||||
|
||||
// This reports the CPU clock, a high-resolution per-CPU timer.
|
||||
// perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK,
|
||||
// a bit broken according to this: https://stackoverflow.com/a/56967896
|
||||
// makeInfo(perf_type_id::PERF_TYPE_SOFTWARE, perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK, ProfileEvents::PERF_COUNT_SW_CPU_CLOCK),
|
||||
// This reports a clock count specific to the task that is running.
|
||||
perf_sw_ids::PERF_COUNT_SW_TASK_CLOCK,
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_TASK_CLOCK, ProfileEvents::PERF_COUNT_SW_TASK_CLOCK),
|
||||
// This reports the number of page faults.
|
||||
perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS,
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS),
|
||||
// This counts context switches.
|
||||
// Until Linux 2.6.34, these were all reported as user-space events,
|
||||
// after that they are reported as happening in the kernel
|
||||
perf_sw_ids::PERF_COUNT_SW_CONTEXT_SWITCHES,
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_CONTEXT_SWITCHES, ProfileEvents::PERF_COUNT_SW_CONTEXT_SWITCHES),
|
||||
// This reports the number of times the process has migrated to a new CPU.
|
||||
perf_sw_ids::PERF_COUNT_SW_CPU_MIGRATIONS,
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_CPU_MIGRATIONS, ProfileEvents::PERF_COUNT_SW_CPU_MIGRATIONS),
|
||||
// This counts the number of minor page faults. These did not require disk I/O to handle.
|
||||
perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MIN, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MIN),
|
||||
// This counts the number of major page faults. These required disk I/O to handle.
|
||||
perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MAJ, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MAJ),
|
||||
// This counts the number of alignment faults. These happen when unaligned memory accesses happen;
|
||||
// the kernel can handle these but it reduces performance.
|
||||
// This happens only on some architectures (never on x86).
|
||||
perf_sw_ids::PERF_COUNT_SW_ALIGNMENT_FAULTS,
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_ALIGNMENT_FAULTS, ProfileEvents::PERF_COUNT_SW_ALIGNMENT_FAULTS),
|
||||
// This counts the number of emulation faults. The kernel sometimes traps on unimplemented instructions and
|
||||
// emulates them for user space. This can negatively impact performance.
|
||||
perf_sw_ids::PERF_COUNT_SW_EMULATION_FAULTS
|
||||
softwareEvent(perf_sw_ids::PERF_COUNT_SW_EMULATION_FAULTS, ProfileEvents::PERF_COUNT_SW_EMULATION_FAULTS)
|
||||
// This is a placeholder event that counts nothing. Informational sample record types such as mmap or
|
||||
// comm must be associated with an active event. This dummy event allows gathering such records
|
||||
// without requiring a counting event.
|
||||
// perf_sw_ids::PERF_COUNT_SW_DUMMY
|
||||
// softwareEventInfo(perf_sw_ids::PERF_COUNT_SW_DUMMY, ProfileEvents::PERF_COUNT_SW_DUMMY)
|
||||
};
|
||||
|
||||
const ProfileEvents::Event PerfEventsCounters::perf_events[] = {
|
||||
// a bit broken according to this: https://stackoverflow.com/a/56967896
|
||||
// ProfileEvents::PERF_COUNT_SW_CPU_CLOCK,
|
||||
ProfileEvents::PERF_COUNT_SW_TASK_CLOCK,
|
||||
ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS,
|
||||
ProfileEvents::PERF_COUNT_SW_CONTEXT_SWITCHES,
|
||||
ProfileEvents::PERF_COUNT_SW_CPU_MIGRATIONS,
|
||||
ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
||||
ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
||||
ProfileEvents::PERF_COUNT_SW_ALIGNMENT_FAULTS,
|
||||
ProfileEvents::PERF_COUNT_SW_EMULATION_FAULTS
|
||||
// ProfileEvents::PERF_COUNT_SW_DUMMY,
|
||||
};
|
||||
static_assert(std::size(PerfEventsCounters::perf_raw_events_info) == PerfEventsCounters::NUMBER_OF_RAW_EVENTS);
|
||||
|
||||
static_assert(std::size(PerfEventsCounters::perf_event_configs) == PerfEventsCounters::NUMBER_OF_EVENTS);
|
||||
static_assert(std::size(PerfEventsCounters::perf_events) == PerfEventsCounters::NUMBER_OF_EVENTS);
|
||||
const std::map<int, int> PerfEventsCounters::event_config_to_info_index = [] {
|
||||
std::map<int, int> map;
|
||||
for (size_t i = 0; i < PerfEventsCounters::NUMBER_OF_RAW_EVENTS; ++i)
|
||||
map.emplace(PerfEventsCounters::perf_raw_events_info[i].event_config, i);
|
||||
return map;
|
||||
} ();
|
||||
|
||||
std::atomic<bool> PerfEventsCounters::events_availability_logged = false;
|
||||
|
||||
Logger * PerfEventsCounters::getLogger()
|
||||
{
|
||||
return &Logger::get("PerfEventsCounters");
|
||||
}
|
||||
|
||||
long long PerfEventsCounters::getRawValue(int event_config)
|
||||
{
|
||||
return raw_event_values[event_config_to_info_index.find(event_config)->second];
|
||||
}
|
||||
|
||||
static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
|
||||
return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
|
||||
@ -89,7 +136,7 @@ namespace DB {
|
||||
return true;
|
||||
}
|
||||
|
||||
static void perfEventStart(int perf_event_paranoid, int perf_event_type, int perf_event_config, int &event_file_descriptor) {
|
||||
static void perfEventOpenDisabled(int perf_event_paranoid, int perf_event_type, int perf_event_config, int &event_file_descriptor) {
|
||||
perf_event_attr pe = perf_event_attr();
|
||||
pe.type = perf_event_type;
|
||||
pe.size = sizeof(struct perf_event_attr);
|
||||
@ -113,19 +160,27 @@ namespace DB {
|
||||
if (!is_pref_available)
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < NUMBER_OF_EVENTS; ++i)
|
||||
bool expected = false;
|
||||
bool log_unsupported_event = events_availability_logged.compare_exchange_strong(expected, true);
|
||||
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
||||
{
|
||||
perfEventStart(perf_event_paranoid, perf_type_id::PERF_TYPE_SOFTWARE, perf_event_configs[i], counters.events_descriptors[i]);
|
||||
counters.raw_event_values[i] = 0;
|
||||
const PerfEventInfo & event_info = perf_raw_events_info[i];
|
||||
int & fd = counters.events_descriptors[i];
|
||||
perfEventOpenDisabled(perf_event_paranoid, event_info.event_type, event_info.event_config, fd);
|
||||
|
||||
if (fd == -1 && log_unsupported_event)
|
||||
{
|
||||
LOG_WARNING(
|
||||
getLogger(),
|
||||
"Perf event is unsupported: event_type=" << event_info.event_type
|
||||
<< ", event_config=" << event_info.event_config);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < NUMBER_OF_EVENTS; ++i)
|
||||
for (int fd : counters.events_descriptors)
|
||||
{
|
||||
int fd = counters.events_descriptors[i];
|
||||
if (fd == -1) {
|
||||
fprintf(stderr, "Event config %d is unsupported\n", perf_event_configs[i]);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fd != -1)
|
||||
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||
}
|
||||
|
||||
@ -136,26 +191,48 @@ namespace DB {
|
||||
if (!counters.perf_events_recording)
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < NUMBER_OF_EVENTS; ++i) {
|
||||
// process raw events
|
||||
|
||||
// only read counters here to have as little overhead for processing as possible
|
||||
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
||||
{
|
||||
int fd = counters.events_descriptors[i];
|
||||
if (fd != -1)
|
||||
read(fd, &counters.raw_event_values[i], sizeof(long long));
|
||||
}
|
||||
|
||||
// actually process counters' values and release resources
|
||||
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
||||
{
|
||||
int & fd = counters.events_descriptors[i];
|
||||
if (fd == -1)
|
||||
continue;
|
||||
|
||||
long long count;
|
||||
read(fd, &count, sizeof(count));
|
||||
|
||||
profile_events.increment(perf_events[i], static_cast<ProfileEvents::Count>(count));
|
||||
// printf("%s: %lld\n", perf_event_names[i].c_str(), count);
|
||||
profile_events.increment(perf_raw_events_info[i].profile_event, counters.raw_event_values[i]);
|
||||
|
||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
|
||||
fprintf(stderr, "Can't disable perf event with file descriptor: %d\n", fd);
|
||||
LOG_WARNING(getLogger(), "Can't disable perf event with file descriptor: " << fd);
|
||||
|
||||
if (close(fd))
|
||||
fprintf(stderr, "Can't close perf event file descriptor: %d; error: %d - %s\n", fd, errno, strerror(errno));
|
||||
LOG_WARNING(getLogger(), "Can't close perf event file descriptor: " << fd << "; error: " << errno << " - " << strerror(errno));
|
||||
|
||||
fd = -1;
|
||||
}
|
||||
|
||||
// process custom events which depend on the raw ones
|
||||
long long hw_cpu_cycles = counters.getRawValue(perf_hw_id::PERF_COUNT_HW_CPU_CYCLES);
|
||||
long long hw_ref_cpu_cycles = counters.getRawValue(perf_hw_id::PERF_COUNT_HW_REF_CPU_CYCLES);
|
||||
|
||||
long long instructions_per_cpu_scaled = hw_cpu_cycles != 0
|
||||
? counters.getRawValue(perf_hw_id::PERF_COUNT_HW_INSTRUCTIONS) / hw_cpu_cycles
|
||||
: 0;
|
||||
long long instructions_per_cpu = hw_ref_cpu_cycles != 0
|
||||
? counters.getRawValue(perf_hw_id::PERF_COUNT_HW_INSTRUCTIONS) / hw_ref_cpu_cycles
|
||||
: 0;
|
||||
|
||||
profile_events.increment(ProfileEvents::PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE_SCALED, instructions_per_cpu_scaled);
|
||||
profile_events.increment(ProfileEvents::PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE, instructions_per_cpu);
|
||||
|
||||
counters.perf_events_recording = false;
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <ctime>
|
||||
#include <sys/resource.h>
|
||||
#include <pthread.h>
|
||||
#include <map>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#if defined(__linux__)
|
||||
#include <linux/taskstats.h>
|
||||
@ -35,6 +37,17 @@ namespace ProfileEvents
|
||||
extern const Event OSReadBytes;
|
||||
extern const Event OSWriteBytes;
|
||||
|
||||
extern const Event PERF_COUNT_HW_CPU_CYCLES;
|
||||
extern const Event PERF_COUNT_HW_INSTRUCTIONS;
|
||||
extern const Event PERF_COUNT_HW_CACHE_REFERENCES;
|
||||
extern const Event PERF_COUNT_HW_CACHE_MISSES;
|
||||
extern const Event PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
|
||||
extern const Event PERF_COUNT_HW_BRANCH_MISSES;
|
||||
extern const Event PERF_COUNT_HW_BUS_CYCLES;
|
||||
extern const Event PERF_COUNT_HW_STALLED_CYCLES_FRONTEND;
|
||||
extern const Event PERF_COUNT_HW_STALLED_CYCLES_BACKEND;
|
||||
extern const Event PERF_COUNT_HW_REF_CPU_CYCLES;
|
||||
|
||||
// extern const Event PERF_COUNT_SW_CPU_CLOCK;
|
||||
extern const Event PERF_COUNT_SW_TASK_CLOCK;
|
||||
extern const Event PERF_COUNT_SW_PAGE_FAULTS;
|
||||
@ -44,6 +57,9 @@ namespace ProfileEvents
|
||||
extern const Event PERF_COUNT_SW_PAGE_FAULTS_MAJ;
|
||||
extern const Event PERF_COUNT_SW_ALIGNMENT_FAULTS;
|
||||
extern const Event PERF_COUNT_SW_EMULATION_FAULTS;
|
||||
|
||||
extern const Event PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE_SCALED;
|
||||
extern const Event PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -127,6 +143,18 @@ struct RUsageCounters
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(__linux__)
|
||||
|
||||
struct PerfEventInfo {
|
||||
// see perf_event.h/perf_type_id enum
|
||||
int event_type;
|
||||
// see configs in perf_event.h
|
||||
int event_config;
|
||||
ProfileEvents::Event profile_event;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
struct PerfEventsCounters
|
||||
{
|
||||
// cat /proc/sys/kernel/perf_event_paranoid - if perf_event_paranoid is set to 3, all calls to `perf_event_open` are rejected (even for the current process)
|
||||
@ -143,18 +171,28 @@ struct PerfEventsCounters
|
||||
// todo: check whether perf_event_open() is available with CAP_SYS_ADMIN
|
||||
|
||||
#if defined(__linux__)
|
||||
static constexpr size_t NUMBER_OF_EVENTS = 8;
|
||||
static constexpr size_t NUMBER_OF_RAW_EVENTS = 18;
|
||||
|
||||
static const int perf_event_configs[];
|
||||
static const ProfileEvents::Event perf_events[];
|
||||
static const PerfEventInfo perf_raw_events_info[];
|
||||
static const std::map<int, int> event_config_to_info_index;
|
||||
|
||||
int events_descriptors[NUMBER_OF_EVENTS];
|
||||
int events_descriptors[NUMBER_OF_RAW_EVENTS];
|
||||
// temp array just to not create it each time event processing finishes
|
||||
long long raw_event_values[NUMBER_OF_RAW_EVENTS];
|
||||
bool perf_events_recording = false;
|
||||
#endif
|
||||
|
||||
static void initializeProfileEvents(PerfEventsCounters & counters);
|
||||
|
||||
static void finalizeProfileEvents(PerfEventsCounters & counters, ProfileEvents::Counters & profile_events);
|
||||
|
||||
private:
|
||||
// used to write information about perf event availability only once for all threads
|
||||
static std::atomic<bool> events_availability_logged;
|
||||
|
||||
static Logger * getLogger();
|
||||
|
||||
long long getRawValue(int event_config);
|
||||
};
|
||||
|
||||
#if defined(__linux__)
|
||||
|
Loading…
Reference in New Issue
Block a user