mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-10-18 06:20:51 +00:00
Fixed logging for events + added hardware event and a couple of custom ones
This commit is contained in:
parent
f87b062f9f
commit
f769a51331
@ -178,6 +178,17 @@
|
|||||||
M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \
|
M(OSReadChars, "Number of bytes read from filesystem, including page cache.") \
|
||||||
M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \
|
M(OSWriteChars, "Number of bytes written to filesystem, including page cache.") \
|
||||||
\
|
\
|
||||||
|
M(PERF_COUNT_HW_CPU_CYCLES, "") \
|
||||||
|
M(PERF_COUNT_HW_INSTRUCTIONS, "") \
|
||||||
|
M(PERF_COUNT_HW_CACHE_REFERENCES, "") \
|
||||||
|
M(PERF_COUNT_HW_CACHE_MISSES, "") \
|
||||||
|
M(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "") \
|
||||||
|
M(PERF_COUNT_HW_BRANCH_MISSES, "") \
|
||||||
|
M(PERF_COUNT_HW_BUS_CYCLES, "") \
|
||||||
|
M(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, "") \
|
||||||
|
M(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, "") \
|
||||||
|
M(PERF_COUNT_HW_REF_CPU_CYCLES, "") \
|
||||||
|
\
|
||||||
M(PERF_COUNT_SW_TASK_CLOCK, "A clock count specific to the task that is running") \
|
M(PERF_COUNT_SW_TASK_CLOCK, "A clock count specific to the task that is running") \
|
||||||
M(PERF_COUNT_SW_PAGE_FAULTS, "Number of page faults") \
|
M(PERF_COUNT_SW_PAGE_FAULTS, "Number of page faults") \
|
||||||
M(PERF_COUNT_SW_CONTEXT_SWITCHES, "Number of context switches") \
|
M(PERF_COUNT_SW_CONTEXT_SWITCHES, "Number of context switches") \
|
||||||
@ -187,6 +198,9 @@
|
|||||||
M(PERF_COUNT_SW_ALIGNMENT_FAULTS, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \
|
M(PERF_COUNT_SW_ALIGNMENT_FAULTS, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \
|
||||||
M(PERF_COUNT_SW_EMULATION_FAULTS, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \
|
M(PERF_COUNT_SW_EMULATION_FAULTS, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \
|
||||||
\
|
\
|
||||||
|
M(PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE_SCALED, "") \
|
||||||
|
M(PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE, "") \
|
||||||
|
\
|
||||||
M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
|
M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
|
||||||
\
|
\
|
||||||
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
|
M(CannotWriteToWriteBufferDiscard, "Number of stack traces dropped by query profiler or signal handler because pipe is full or cannot write to pipe.") \
|
||||||
|
@ -12,56 +12,103 @@ namespace DB {
|
|||||||
|
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
|
|
||||||
// todo: think about event counters' overflow
|
static PerfEventInfo softwareEvent(int event_config, ProfileEvents::Event profile_event)
|
||||||
// todo: ask about the usual error reporting (whether stderr is an accepted way)
|
{
|
||||||
|
return PerfEventInfo
|
||||||
|
{
|
||||||
|
.event_type = perf_type_id::PERF_TYPE_SOFTWARE,
|
||||||
|
.event_config = event_config,
|
||||||
|
.profile_event = profile_event
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
static PerfEventInfo hardwareEvent(int event_config, ProfileEvents::Event profile_event)
|
||||||
|
{
|
||||||
|
return PerfEventInfo
|
||||||
|
{
|
||||||
|
.event_type = perf_type_id::PERF_TYPE_HARDWARE,
|
||||||
|
.event_config = event_config,
|
||||||
|
.profile_event = profile_event
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
|
// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
|
||||||
const int PerfEventsCounters::perf_event_configs[] = {
|
const PerfEventInfo PerfEventsCounters::perf_raw_events_info[] = {
|
||||||
|
|
||||||
|
// Total cycles. Be wary of what happens during CPU frequency scaling.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_CPU_CYCLES, ProfileEvents::PERF_COUNT_HW_CPU_CYCLES),
|
||||||
|
// Retired instructions. Be careful, these can be affected by various issues, most notably hardware
|
||||||
|
// interrupt counts.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_INSTRUCTIONS, ProfileEvents::PERF_COUNT_HW_INSTRUCTIONS),
|
||||||
|
// Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU.
|
||||||
|
// This may include prefetches and coherency messages; again this depends on the design of your CPU.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_CACHE_REFERENCES, ProfileEvents::PERF_COUNT_HW_CACHE_REFERENCES),
|
||||||
|
// Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in con‐junction
|
||||||
|
// with the PERF_COUNT_HW_CACHE_REFERENCES event to calculate cache miss rates.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_CACHE_MISSES, ProfileEvents::PERF_COUNT_HW_CACHE_MISSES),
|
||||||
|
// Retired branch instructions. Prior to Linux 2.6.35, this used the wrong event on AMD processors.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_BRANCH_INSTRUCTIONS, ProfileEvents::PERF_COUNT_HW_BRANCH_INSTRUCTIONS),
|
||||||
|
// Mispredicted branch instructions.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_BRANCH_MISSES, ProfileEvents::PERF_COUNT_HW_BRANCH_MISSES),
|
||||||
|
// Bus cycles, which can be different from total cycles.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_BUS_CYCLES, ProfileEvents::PERF_COUNT_HW_BUS_CYCLES),
|
||||||
|
// Stalled cycles during issue.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, ProfileEvents::PERF_COUNT_HW_STALLED_CYCLES_FRONTEND),
|
||||||
|
// Stalled cycles during retirement.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_STALLED_CYCLES_BACKEND, ProfileEvents::PERF_COUNT_HW_STALLED_CYCLES_BACKEND),
|
||||||
|
// Total cycles; not affected by CPU frequency scaling.
|
||||||
|
hardwareEvent(perf_hw_id::PERF_COUNT_HW_REF_CPU_CYCLES, ProfileEvents::PERF_COUNT_HW_REF_CPU_CYCLES),
|
||||||
|
|
||||||
// This reports the CPU clock, a high-resolution per-CPU timer.
|
// This reports the CPU clock, a high-resolution per-CPU timer.
|
||||||
// perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK,
|
// a bit broken according to this: https://stackoverflow.com/a/56967896
|
||||||
|
// makeInfo(perf_type_id::PERF_TYPE_SOFTWARE, perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK, ProfileEvents::PERF_COUNT_SW_CPU_CLOCK),
|
||||||
// This reports a clock count specific to the task that is running.
|
// This reports a clock count specific to the task that is running.
|
||||||
perf_sw_ids::PERF_COUNT_SW_TASK_CLOCK,
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_TASK_CLOCK, ProfileEvents::PERF_COUNT_SW_TASK_CLOCK),
|
||||||
// This reports the number of page faults.
|
// This reports the number of page faults.
|
||||||
perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS,
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS),
|
||||||
// This counts context switches.
|
// This counts context switches.
|
||||||
// Until Linux 2.6.34, these were all reported as user-space events,
|
// Until Linux 2.6.34, these were all reported as user-space events,
|
||||||
// after that they are reported as happening in the kernel
|
// after that they are reported as happening in the kernel
|
||||||
perf_sw_ids::PERF_COUNT_SW_CONTEXT_SWITCHES,
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_CONTEXT_SWITCHES, ProfileEvents::PERF_COUNT_SW_CONTEXT_SWITCHES),
|
||||||
// This reports the number of times the process has migrated to a new CPU.
|
// This reports the number of times the process has migrated to a new CPU.
|
||||||
perf_sw_ids::PERF_COUNT_SW_CPU_MIGRATIONS,
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_CPU_MIGRATIONS, ProfileEvents::PERF_COUNT_SW_CPU_MIGRATIONS),
|
||||||
// This counts the number of minor page faults. These did not require disk I/O to handle.
|
// This counts the number of minor page faults. These did not require disk I/O to handle.
|
||||||
perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MIN, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MIN),
|
||||||
// This counts the number of major page faults. These required disk I/O to handle.
|
// This counts the number of major page faults. These required disk I/O to handle.
|
||||||
perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_PAGE_FAULTS_MAJ, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MAJ),
|
||||||
// This counts the number of alignment faults. These happen when unaligned memory accesses happen;
|
// This counts the number of alignment faults. These happen when unaligned memory accesses happen;
|
||||||
// the kernel can handle these but it reduces performance.
|
// the kernel can handle these but it reduces performance.
|
||||||
// This happens only on some architectures (never on x86).
|
// This happens only on some architectures (never on x86).
|
||||||
perf_sw_ids::PERF_COUNT_SW_ALIGNMENT_FAULTS,
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_ALIGNMENT_FAULTS, ProfileEvents::PERF_COUNT_SW_ALIGNMENT_FAULTS),
|
||||||
// This counts the number of emulation faults. The kernel sometimes traps on unimplemented instructions and
|
// This counts the number of emulation faults. The kernel sometimes traps on unimplemented instructions and
|
||||||
// emulates them for user space. This can negatively impact performance.
|
// emulates them for user space. This can negatively impact performance.
|
||||||
perf_sw_ids::PERF_COUNT_SW_EMULATION_FAULTS
|
softwareEvent(perf_sw_ids::PERF_COUNT_SW_EMULATION_FAULTS, ProfileEvents::PERF_COUNT_SW_EMULATION_FAULTS)
|
||||||
// This is a placeholder event that counts nothing. Informational sample record types such as mmap or
|
// This is a placeholder event that counts nothing. Informational sample record types such as mmap or
|
||||||
// comm must be associated with an active event. This dummy event allows gathering such records
|
// comm must be associated with an active event. This dummy event allows gathering such records
|
||||||
// without requiring a counting event.
|
// without requiring a counting event.
|
||||||
// perf_sw_ids::PERF_COUNT_SW_DUMMY
|
// softwareEventInfo(perf_sw_ids::PERF_COUNT_SW_DUMMY, ProfileEvents::PERF_COUNT_SW_DUMMY)
|
||||||
};
|
};
|
||||||
|
|
||||||
const ProfileEvents::Event PerfEventsCounters::perf_events[] = {
|
static_assert(std::size(PerfEventsCounters::perf_raw_events_info) == PerfEventsCounters::NUMBER_OF_RAW_EVENTS);
|
||||||
// a bit broken according to this: https://stackoverflow.com/a/56967896
|
|
||||||
// ProfileEvents::PERF_COUNT_SW_CPU_CLOCK,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_TASK_CLOCK,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_CONTEXT_SWITCHES,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_CPU_MIGRATIONS,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_ALIGNMENT_FAULTS,
|
|
||||||
ProfileEvents::PERF_COUNT_SW_EMULATION_FAULTS
|
|
||||||
// ProfileEvents::PERF_COUNT_SW_DUMMY,
|
|
||||||
};
|
|
||||||
|
|
||||||
static_assert(std::size(PerfEventsCounters::perf_event_configs) == PerfEventsCounters::NUMBER_OF_EVENTS);
|
const std::map<int, int> PerfEventsCounters::event_config_to_info_index = [] {
|
||||||
static_assert(std::size(PerfEventsCounters::perf_events) == PerfEventsCounters::NUMBER_OF_EVENTS);
|
std::map<int, int> map;
|
||||||
|
for (size_t i = 0; i < PerfEventsCounters::NUMBER_OF_RAW_EVENTS; ++i)
|
||||||
|
map.emplace(PerfEventsCounters::perf_raw_events_info[i].event_config, i);
|
||||||
|
return map;
|
||||||
|
} ();
|
||||||
|
|
||||||
|
std::atomic<bool> PerfEventsCounters::events_availability_logged = false;
|
||||||
|
|
||||||
|
Logger * PerfEventsCounters::getLogger()
|
||||||
|
{
|
||||||
|
return &Logger::get("PerfEventsCounters");
|
||||||
|
}
|
||||||
|
|
||||||
|
long long PerfEventsCounters::getRawValue(int event_config)
|
||||||
|
{
|
||||||
|
return raw_event_values[event_config_to_info_index.find(event_config)->second];
|
||||||
|
}
|
||||||
|
|
||||||
static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
|
static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
|
||||||
return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
|
return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
|
||||||
@ -89,7 +136,7 @@ namespace DB {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void perfEventStart(int perf_event_paranoid, int perf_event_type, int perf_event_config, int &event_file_descriptor) {
|
static void perfEventOpenDisabled(int perf_event_paranoid, int perf_event_type, int perf_event_config, int &event_file_descriptor) {
|
||||||
perf_event_attr pe = perf_event_attr();
|
perf_event_attr pe = perf_event_attr();
|
||||||
pe.type = perf_event_type;
|
pe.type = perf_event_type;
|
||||||
pe.size = sizeof(struct perf_event_attr);
|
pe.size = sizeof(struct perf_event_attr);
|
||||||
@ -113,20 +160,28 @@ namespace DB {
|
|||||||
if (!is_pref_available)
|
if (!is_pref_available)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (size_t i = 0; i < NUMBER_OF_EVENTS; ++i)
|
bool expected = false;
|
||||||
|
bool log_unsupported_event = events_availability_logged.compare_exchange_strong(expected, true);
|
||||||
|
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
||||||
{
|
{
|
||||||
perfEventStart(perf_event_paranoid, perf_type_id::PERF_TYPE_SOFTWARE, perf_event_configs[i], counters.events_descriptors[i]);
|
counters.raw_event_values[i] = 0;
|
||||||
|
const PerfEventInfo & event_info = perf_raw_events_info[i];
|
||||||
|
int & fd = counters.events_descriptors[i];
|
||||||
|
perfEventOpenDisabled(perf_event_paranoid, event_info.event_type, event_info.event_config, fd);
|
||||||
|
|
||||||
|
if (fd == -1 && log_unsupported_event)
|
||||||
|
{
|
||||||
|
LOG_WARNING(
|
||||||
|
getLogger(),
|
||||||
|
"Perf event is unsupported: event_type=" << event_info.event_type
|
||||||
|
<< ", event_config=" << event_info.event_config);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (size_t i = 0; i < NUMBER_OF_EVENTS; ++i)
|
for (int fd : counters.events_descriptors)
|
||||||
{
|
{
|
||||||
int fd = counters.events_descriptors[i];
|
if (fd != -1)
|
||||||
if (fd == -1) {
|
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
||||||
fprintf(stderr, "Event config %d is unsupported\n", perf_event_configs[i]);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
counters.perf_events_recording = true;
|
counters.perf_events_recording = true;
|
||||||
@ -136,26 +191,48 @@ namespace DB {
|
|||||||
if (!counters.perf_events_recording)
|
if (!counters.perf_events_recording)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (size_t i = 0; i < NUMBER_OF_EVENTS; ++i) {
|
// process raw events
|
||||||
|
|
||||||
|
// only read counters here to have as little overhead for processing as possible
|
||||||
|
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
||||||
|
{
|
||||||
|
int fd = counters.events_descriptors[i];
|
||||||
|
if (fd != -1)
|
||||||
|
read(fd, &counters.raw_event_values[i], sizeof(long long));
|
||||||
|
}
|
||||||
|
|
||||||
|
// actually process counters' values and release resources
|
||||||
|
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
||||||
|
{
|
||||||
int & fd = counters.events_descriptors[i];
|
int & fd = counters.events_descriptors[i];
|
||||||
if (fd == -1)
|
if (fd == -1)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
long long count;
|
profile_events.increment(perf_raw_events_info[i].profile_event, counters.raw_event_values[i]);
|
||||||
read(fd, &count, sizeof(count));
|
|
||||||
|
|
||||||
profile_events.increment(perf_events[i], static_cast<ProfileEvents::Count>(count));
|
|
||||||
// printf("%s: %lld\n", perf_event_names[i].c_str(), count);
|
|
||||||
|
|
||||||
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
|
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
|
||||||
fprintf(stderr, "Can't disable perf event with file descriptor: %d\n", fd);
|
LOG_WARNING(getLogger(), "Can't disable perf event with file descriptor: " << fd);
|
||||||
|
|
||||||
if (close(fd))
|
if (close(fd))
|
||||||
fprintf(stderr, "Can't close perf event file descriptor: %d; error: %d - %s\n", fd, errno, strerror(errno));
|
LOG_WARNING(getLogger(), "Can't close perf event file descriptor: " << fd << "; error: " << errno << " - " << strerror(errno));
|
||||||
|
|
||||||
fd = -1;
|
fd = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// process custom events which depend on the raw ones
|
||||||
|
long long hw_cpu_cycles = counters.getRawValue(perf_hw_id::PERF_COUNT_HW_CPU_CYCLES);
|
||||||
|
long long hw_ref_cpu_cycles = counters.getRawValue(perf_hw_id::PERF_COUNT_HW_REF_CPU_CYCLES);
|
||||||
|
|
||||||
|
long long instructions_per_cpu_scaled = hw_cpu_cycles != 0
|
||||||
|
? counters.getRawValue(perf_hw_id::PERF_COUNT_HW_INSTRUCTIONS) / hw_cpu_cycles
|
||||||
|
: 0;
|
||||||
|
long long instructions_per_cpu = hw_ref_cpu_cycles != 0
|
||||||
|
? counters.getRawValue(perf_hw_id::PERF_COUNT_HW_INSTRUCTIONS) / hw_ref_cpu_cycles
|
||||||
|
: 0;
|
||||||
|
|
||||||
|
profile_events.increment(ProfileEvents::PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE_SCALED, instructions_per_cpu_scaled);
|
||||||
|
profile_events.increment(ProfileEvents::PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE, instructions_per_cpu);
|
||||||
|
|
||||||
counters.perf_events_recording = false;
|
counters.perf_events_recording = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,8 @@
|
|||||||
#include <ctime>
|
#include <ctime>
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
#include <map>
|
||||||
|
#include <common/logger_useful.h>
|
||||||
|
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
#include <linux/taskstats.h>
|
#include <linux/taskstats.h>
|
||||||
@ -35,6 +37,17 @@ namespace ProfileEvents
|
|||||||
extern const Event OSReadBytes;
|
extern const Event OSReadBytes;
|
||||||
extern const Event OSWriteBytes;
|
extern const Event OSWriteBytes;
|
||||||
|
|
||||||
|
extern const Event PERF_COUNT_HW_CPU_CYCLES;
|
||||||
|
extern const Event PERF_COUNT_HW_INSTRUCTIONS;
|
||||||
|
extern const Event PERF_COUNT_HW_CACHE_REFERENCES;
|
||||||
|
extern const Event PERF_COUNT_HW_CACHE_MISSES;
|
||||||
|
extern const Event PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
|
||||||
|
extern const Event PERF_COUNT_HW_BRANCH_MISSES;
|
||||||
|
extern const Event PERF_COUNT_HW_BUS_CYCLES;
|
||||||
|
extern const Event PERF_COUNT_HW_STALLED_CYCLES_FRONTEND;
|
||||||
|
extern const Event PERF_COUNT_HW_STALLED_CYCLES_BACKEND;
|
||||||
|
extern const Event PERF_COUNT_HW_REF_CPU_CYCLES;
|
||||||
|
|
||||||
// extern const Event PERF_COUNT_SW_CPU_CLOCK;
|
// extern const Event PERF_COUNT_SW_CPU_CLOCK;
|
||||||
extern const Event PERF_COUNT_SW_TASK_CLOCK;
|
extern const Event PERF_COUNT_SW_TASK_CLOCK;
|
||||||
extern const Event PERF_COUNT_SW_PAGE_FAULTS;
|
extern const Event PERF_COUNT_SW_PAGE_FAULTS;
|
||||||
@ -44,6 +57,9 @@ namespace ProfileEvents
|
|||||||
extern const Event PERF_COUNT_SW_PAGE_FAULTS_MAJ;
|
extern const Event PERF_COUNT_SW_PAGE_FAULTS_MAJ;
|
||||||
extern const Event PERF_COUNT_SW_ALIGNMENT_FAULTS;
|
extern const Event PERF_COUNT_SW_ALIGNMENT_FAULTS;
|
||||||
extern const Event PERF_COUNT_SW_EMULATION_FAULTS;
|
extern const Event PERF_COUNT_SW_EMULATION_FAULTS;
|
||||||
|
|
||||||
|
extern const Event PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE_SCALED;
|
||||||
|
extern const Event PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -127,6 +143,18 @@ struct RUsageCounters
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if defined(__linux__)
|
||||||
|
|
||||||
|
struct PerfEventInfo {
|
||||||
|
// see perf_event.h/perf_type_id enum
|
||||||
|
int event_type;
|
||||||
|
// see configs in perf_event.h
|
||||||
|
int event_config;
|
||||||
|
ProfileEvents::Event profile_event;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
struct PerfEventsCounters
|
struct PerfEventsCounters
|
||||||
{
|
{
|
||||||
// cat /proc/sys/kernel/perf_event_paranoid - if perf_event_paranoid is set to 3, all calls to `perf_event_open` are rejected (even for the current process)
|
// cat /proc/sys/kernel/perf_event_paranoid - if perf_event_paranoid is set to 3, all calls to `perf_event_open` are rejected (even for the current process)
|
||||||
@ -143,18 +171,28 @@ struct PerfEventsCounters
|
|||||||
// todo: check whether perf_event_open() is available with CAP_SYS_ADMIN
|
// todo: check whether perf_event_open() is available with CAP_SYS_ADMIN
|
||||||
|
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
static constexpr size_t NUMBER_OF_EVENTS = 8;
|
static constexpr size_t NUMBER_OF_RAW_EVENTS = 18;
|
||||||
|
|
||||||
static const int perf_event_configs[];
|
static const PerfEventInfo perf_raw_events_info[];
|
||||||
static const ProfileEvents::Event perf_events[];
|
static const std::map<int, int> event_config_to_info_index;
|
||||||
|
|
||||||
int events_descriptors[NUMBER_OF_EVENTS];
|
int events_descriptors[NUMBER_OF_RAW_EVENTS];
|
||||||
|
// temp array just to not create it each time event processing finishes
|
||||||
|
long long raw_event_values[NUMBER_OF_RAW_EVENTS];
|
||||||
bool perf_events_recording = false;
|
bool perf_events_recording = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void initializeProfileEvents(PerfEventsCounters & counters);
|
static void initializeProfileEvents(PerfEventsCounters & counters);
|
||||||
|
|
||||||
static void finalizeProfileEvents(PerfEventsCounters & counters, ProfileEvents::Counters & profile_events);
|
static void finalizeProfileEvents(PerfEventsCounters & counters, ProfileEvents::Counters & profile_events);
|
||||||
|
|
||||||
|
private:
|
||||||
|
// used to write information about perf event availability only once for all threads
|
||||||
|
static std::atomic<bool> events_availability_logged;
|
||||||
|
|
||||||
|
static Logger * getLogger();
|
||||||
|
|
||||||
|
long long getRawValue(int event_config);
|
||||||
};
|
};
|
||||||
|
|
||||||
#if defined(__linux__)
|
#if defined(__linux__)
|
||||||
|
Loading…
Reference in New Issue
Block a user