2020-02-04 20:47:30 +00:00
|
|
|
#include <Common/ThreadProfileEvents.h>
|
|
|
|
|
|
|
|
#if defined(__linux__)
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <linux/perf_event.h>
|
|
|
|
#include <syscall.h>
|
|
|
|
#include <sys/ioctl.h>
|
2020-02-19 16:35:01 +00:00
|
|
|
#include <cerrno>
|
2020-02-04 20:47:30 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace DB {
|
|
|
|
|
|
|
|
#if defined(__linux__)
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
static PerfEventInfo softwareEvent(int event_config, ProfileEvents::Event profile_event)
|
|
|
|
{
|
|
|
|
return PerfEventInfo
|
|
|
|
{
|
|
|
|
.event_type = perf_type_id::PERF_TYPE_SOFTWARE,
|
|
|
|
.event_config = event_config,
|
|
|
|
.profile_event = profile_event
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
static PerfEventInfo hardwareEvent(int event_config, ProfileEvents::Event profile_event)
|
|
|
|
{
|
|
|
|
return PerfEventInfo
|
|
|
|
{
|
|
|
|
.event_type = perf_type_id::PERF_TYPE_HARDWARE,
|
|
|
|
.event_config = event_config,
|
|
|
|
.profile_event = profile_event
|
|
|
|
};
|
|
|
|
}
|
2020-02-19 16:35:01 +00:00
|
|
|
|
|
|
|
// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
|
2020-02-21 18:43:23 +00:00
|
|
|
const PerfEventInfo PerfEventsCounters::perf_raw_events_info[] = {
|
2020-02-21 18:59:08 +00:00
|
|
|
hardwareEvent(PERF_COUNT_HW_CPU_CYCLES, ProfileEvents::PERF_COUNT_HW_CPU_CYCLES),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_INSTRUCTIONS, ProfileEvents::PERF_COUNT_HW_INSTRUCTIONS),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_CACHE_REFERENCES, ProfileEvents::PERF_COUNT_HW_CACHE_REFERENCES),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_CACHE_MISSES, ProfileEvents::PERF_COUNT_HW_CACHE_MISSES),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_BRANCH_INSTRUCTIONS, ProfileEvents::PERF_COUNT_HW_BRANCH_INSTRUCTIONS),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_BRANCH_MISSES, ProfileEvents::PERF_COUNT_HW_BRANCH_MISSES),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_BUS_CYCLES, ProfileEvents::PERF_COUNT_HW_BUS_CYCLES),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, ProfileEvents::PERF_COUNT_HW_STALLED_CYCLES_FRONTEND),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_STALLED_CYCLES_BACKEND, ProfileEvents::PERF_COUNT_HW_STALLED_CYCLES_BACKEND),
|
|
|
|
hardwareEvent(PERF_COUNT_HW_REF_CPU_CYCLES, ProfileEvents::PERF_COUNT_HW_REF_CPU_CYCLES),
|
2020-02-21 18:43:23 +00:00
|
|
|
|
2020-02-19 16:35:01 +00:00
|
|
|
// This reports the CPU clock, a high-resolution per-CPU timer.
|
2020-02-21 18:43:23 +00:00
|
|
|
// a bit broken according to this: https://stackoverflow.com/a/56967896
|
|
|
|
// makeInfo(perf_type_id::PERF_TYPE_SOFTWARE, perf_sw_ids::PERF_COUNT_SW_CPU_CLOCK, ProfileEvents::PERF_COUNT_SW_CPU_CLOCK),
|
2020-02-21 18:59:08 +00:00
|
|
|
softwareEvent(PERF_COUNT_SW_TASK_CLOCK, ProfileEvents::PERF_COUNT_SW_TASK_CLOCK),
|
|
|
|
softwareEvent(PERF_COUNT_SW_PAGE_FAULTS, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS),
|
|
|
|
softwareEvent(PERF_COUNT_SW_CONTEXT_SWITCHES, ProfileEvents::PERF_COUNT_SW_CONTEXT_SWITCHES),
|
|
|
|
softwareEvent(PERF_COUNT_SW_CPU_MIGRATIONS, ProfileEvents::PERF_COUNT_SW_CPU_MIGRATIONS),
|
|
|
|
softwareEvent(PERF_COUNT_SW_PAGE_FAULTS_MIN, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MIN),
|
|
|
|
softwareEvent(PERF_COUNT_SW_PAGE_FAULTS_MAJ, ProfileEvents::PERF_COUNT_SW_PAGE_FAULTS_MAJ),
|
|
|
|
softwareEvent(PERF_COUNT_SW_ALIGNMENT_FAULTS, ProfileEvents::PERF_COUNT_SW_ALIGNMENT_FAULTS),
|
|
|
|
softwareEvent(PERF_COUNT_SW_EMULATION_FAULTS, ProfileEvents::PERF_COUNT_SW_EMULATION_FAULTS)
|
2020-02-19 16:35:01 +00:00
|
|
|
// This is a placeholder event that counts nothing. Informational sample record types such as mmap or
|
|
|
|
// comm must be associated with an active event. This dummy event allows gathering such records
|
|
|
|
// without requiring a counting event.
|
2020-02-21 18:43:23 +00:00
|
|
|
// softwareEventInfo(perf_sw_ids::PERF_COUNT_SW_DUMMY, ProfileEvents::PERF_COUNT_SW_DUMMY)
|
2020-02-04 20:47:30 +00:00
|
|
|
};
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
static_assert(std::size(PerfEventsCounters::perf_raw_events_info) == PerfEventsCounters::NUMBER_OF_RAW_EVENTS);
|
2020-02-04 20:47:30 +00:00
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
std::atomic<bool> PerfEventsCounters::events_availability_logged = false;
|
|
|
|
|
|
|
|
Logger * PerfEventsCounters::getLogger()
|
|
|
|
{
|
|
|
|
return &Logger::get("PerfEventsCounters");
|
|
|
|
}
|
|
|
|
|
2020-02-21 18:59:08 +00:00
|
|
|
long long PerfEventsCounters::getRawValue(int event_type, int event_config) const
|
2020-02-21 18:43:23 +00:00
|
|
|
{
|
2020-02-21 18:59:08 +00:00
|
|
|
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
|
|
|
{
|
|
|
|
const PerfEventInfo & event_info = perf_raw_events_info[i];
|
|
|
|
if (event_info.event_type == event_type && event_info.event_config == event_config)
|
|
|
|
return raw_event_values[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
LOG_WARNING(getLogger(), "Can't find perf event info for event_type=" << event_type << ", event_config=" << event_config);
|
|
|
|
return 0;
|
2020-02-21 18:43:23 +00:00
|
|
|
}
|
2020-02-04 20:47:30 +00:00
|
|
|
|
|
|
|
static int openPerfEvent(perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) {
|
|
|
|
return static_cast<int>(syscall(SYS_perf_event_open, hw_event, pid, cpu, group_fd, flags));
|
|
|
|
}
|
|
|
|
|
|
|
|
static bool getPerfEventParanoid(int &result) {
|
|
|
|
// the longest possible variant: "-1\0"
|
|
|
|
constexpr int MAX_LENGTH = 3;
|
|
|
|
FILE *fp;
|
|
|
|
char str[MAX_LENGTH];
|
|
|
|
|
|
|
|
fp = fopen("/proc/sys/kernel/perf_event_paranoid", "r");
|
|
|
|
if (fp == nullptr)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
char *res = fgets(str, MAX_LENGTH, fp);
|
|
|
|
fclose(fp);
|
|
|
|
|
|
|
|
if (res == nullptr)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
str[MAX_LENGTH - 1] = '\0';
|
|
|
|
// todo: change to `strtol`
|
|
|
|
result = atoi(str);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
static void perfEventOpenDisabled(int perf_event_paranoid, int perf_event_type, int perf_event_config, int &event_file_descriptor) {
|
2020-02-04 20:47:30 +00:00
|
|
|
perf_event_attr pe = perf_event_attr();
|
|
|
|
pe.type = perf_event_type;
|
|
|
|
pe.size = sizeof(struct perf_event_attr);
|
|
|
|
pe.config = perf_event_config;
|
2020-02-19 16:35:01 +00:00
|
|
|
// disable by default to add as little extra time as possible
|
2020-02-04 20:47:30 +00:00
|
|
|
pe.disabled = 1;
|
|
|
|
// can record kernel only when `perf_event_paranoid` <= 1
|
|
|
|
pe.exclude_kernel = perf_event_paranoid >= 2;
|
|
|
|
|
2020-02-19 16:35:01 +00:00
|
|
|
event_file_descriptor = openPerfEvent(&pe, /* measure the calling thread */ 0, /* on any cpu */ -1, -1, 0);
|
2020-02-04 20:47:30 +00:00
|
|
|
}
|
|
|
|
|
2020-02-19 16:35:01 +00:00
|
|
|
void PerfEventsCounters::initializeProfileEvents(PerfEventsCounters & counters) {
|
|
|
|
if (counters.perf_events_recording)
|
2020-02-04 20:47:30 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
int perf_event_paranoid = 0;
|
|
|
|
bool is_pref_available = getPerfEventParanoid(perf_event_paranoid);
|
|
|
|
// printf("is_perf_available: %s, perf_event_paranoid: %d\n", is_pref_available ? "true" : "false", perf_event_paranoid);
|
|
|
|
|
|
|
|
if (!is_pref_available)
|
|
|
|
return;
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
bool expected = false;
|
|
|
|
bool log_unsupported_event = events_availability_logged.compare_exchange_strong(expected, true);
|
|
|
|
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
2020-02-04 20:47:30 +00:00
|
|
|
{
|
2020-02-21 18:43:23 +00:00
|
|
|
counters.raw_event_values[i] = 0;
|
|
|
|
const PerfEventInfo & event_info = perf_raw_events_info[i];
|
|
|
|
int & fd = counters.events_descriptors[i];
|
|
|
|
perfEventOpenDisabled(perf_event_paranoid, event_info.event_type, event_info.event_config, fd);
|
|
|
|
|
|
|
|
if (fd == -1 && log_unsupported_event)
|
|
|
|
{
|
|
|
|
LOG_WARNING(
|
|
|
|
getLogger(),
|
|
|
|
"Perf event is unsupported: event_type=" << event_info.event_type
|
|
|
|
<< ", event_config=" << event_info.event_config);
|
|
|
|
}
|
2020-02-04 20:47:30 +00:00
|
|
|
}
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
for (int fd : counters.events_descriptors)
|
2020-02-04 20:47:30 +00:00
|
|
|
{
|
2020-02-21 18:43:23 +00:00
|
|
|
if (fd != -1)
|
|
|
|
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
|
2020-02-19 16:35:01 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
counters.perf_events_recording = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void PerfEventsCounters::finalizeProfileEvents(PerfEventsCounters & counters, ProfileEvents::Counters & profile_events) {
|
|
|
|
if (!counters.perf_events_recording)
|
|
|
|
return;
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
// process raw events
|
|
|
|
|
|
|
|
// only read counters here to have as little overhead for processing as possible
|
|
|
|
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
|
|
|
{
|
|
|
|
int fd = counters.events_descriptors[i];
|
|
|
|
if (fd != -1)
|
|
|
|
read(fd, &counters.raw_event_values[i], sizeof(long long));
|
|
|
|
}
|
|
|
|
|
|
|
|
// actually process counters' values and release resources
|
|
|
|
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
|
|
|
{
|
2020-02-19 16:35:01 +00:00
|
|
|
int & fd = counters.events_descriptors[i];
|
|
|
|
if (fd == -1)
|
|
|
|
continue;
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
profile_events.increment(perf_raw_events_info[i].profile_event, counters.raw_event_values[i]);
|
2020-02-19 16:35:01 +00:00
|
|
|
|
|
|
|
if (ioctl(fd, PERF_EVENT_IOC_DISABLE, 0))
|
2020-02-21 18:43:23 +00:00
|
|
|
LOG_WARNING(getLogger(), "Can't disable perf event with file descriptor: " << fd);
|
2020-02-19 16:35:01 +00:00
|
|
|
|
|
|
|
if (close(fd))
|
2020-02-21 18:43:23 +00:00
|
|
|
LOG_WARNING(getLogger(), "Can't close perf event file descriptor: " << fd << "; error: " << errno << " - " << strerror(errno));
|
2020-02-19 16:35:01 +00:00
|
|
|
|
|
|
|
fd = -1;
|
2020-02-04 20:47:30 +00:00
|
|
|
}
|
|
|
|
|
2020-02-21 18:43:23 +00:00
|
|
|
// process custom events which depend on the raw ones
|
2020-02-21 18:59:08 +00:00
|
|
|
long long hw_cpu_cycles = counters.getRawValue(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES);
|
|
|
|
long long hw_ref_cpu_cycles = counters.getRawValue(PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES);
|
2020-02-21 18:43:23 +00:00
|
|
|
|
|
|
|
long long instructions_per_cpu_scaled = hw_cpu_cycles != 0
|
2020-02-21 18:59:08 +00:00
|
|
|
? counters.getRawValue(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS) / hw_cpu_cycles
|
2020-02-21 18:43:23 +00:00
|
|
|
: 0;
|
|
|
|
long long instructions_per_cpu = hw_ref_cpu_cycles != 0
|
2020-02-21 18:59:08 +00:00
|
|
|
? counters.getRawValue(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS) / hw_ref_cpu_cycles
|
2020-02-21 18:43:23 +00:00
|
|
|
: 0;
|
|
|
|
|
|
|
|
profile_events.increment(ProfileEvents::PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE_SCALED, instructions_per_cpu_scaled);
|
|
|
|
profile_events.increment(ProfileEvents::PERF_CUSTOM_INSTRUCTIONS_PER_CPU_CYCLE, instructions_per_cpu);
|
|
|
|
|
2020-02-19 16:35:01 +00:00
|
|
|
counters.perf_events_recording = false;
|
2020-02-04 20:47:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2020-02-19 16:35:01 +00:00
|
|
|
void PerfEventsCounters::initializeProfileEvents(PerfEventsCounters &) {}
|
|
|
|
void PerfEventsCounters::finalizeProfileEvents(PerfEventsCounters &, ProfileEvents::Counters &) {}
|
2020-02-04 20:47:30 +00:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
}
|