mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-27 12:10:49 +00:00
Add TLB misses perf counters.
This commit is contained in:
parent
1b1c32fe89
commit
af8d62bbcb
@ -196,8 +196,12 @@
|
||||
M(PerfCpuMigrations, "Number of times the process has migrated to a new CPU") \
|
||||
M(PerfAlignmentFaults, "Number of alignment faults. These happen when unaligned memory accesses happen; the kernel can handle these but it reduces performance. This happens only on some architectures (never on x86).") \
|
||||
M(PerfEmulationFaults, "Number of emulation faults. The kernel sometimes traps on unimplemented instructions and emulates them for user space. This can negatively impact performance.") \
|
||||
M(PerfPageFaultsMinor, "This counts the number of minor page faults. These did not require disk I/O to handle.") \
|
||||
M(PerfPageFaultsMajor, "This counts the number of major page faults. These required disk I/O to handle.") \
|
||||
M(PerfMinEnabledTime, "For all events, minimum time that an event was enabled. Used to track event multiplexing influence") \
|
||||
M(PerfMinEnabledRunningTime, "Running time for event with minimum enabled time. Used to track the amount of event multiplexing") \
|
||||
M(PerfDataTLBReferences, "Data TLB references") \
|
||||
M(PerfDataTLBMisses, "Data TLB misses") \
|
||||
M(PerfInstructionTLBReferences, "Instruction TLB references") \
|
||||
M(PerfInstructionTLBMisses, "Instruction TLB misses") \
|
||||
\
|
||||
M(CreatedHTTPConnections, "Total amount of created HTTP connections (closed or opened).") \
|
||||
\
|
||||
|
@ -147,6 +147,19 @@ thread_local PerfEventsCounters current_thread_counters;
|
||||
.settings_name = #LOCAL_NAME \
|
||||
}
|
||||
|
||||
// One event for cache accesses and one for cache misses.
|
||||
// Type is ACCESS or MISS
|
||||
#define CACHE_EVENT(PERF_NAME, LOCAL_NAME, TYPE) \
|
||||
PerfEventInfo \
|
||||
{ \
|
||||
.event_type = perf_type_id::PERF_TYPE_HW_CACHE, \
|
||||
.event_config = (PERF_NAME) \
|
||||
| (PERF_COUNT_HW_CACHE_OP_READ << 8) \
|
||||
| (PERF_COUNT_HW_CACHE_RESULT_ ## TYPE << 16), \
|
||||
.profile_event = ProfileEvents::LOCAL_NAME, \
|
||||
.settings_name = #LOCAL_NAME \
|
||||
}
|
||||
|
||||
// descriptions' source: http://man7.org/linux/man-pages/man2/perf_event_open.2.html
|
||||
static const PerfEventInfo raw_events_info[] = {
|
||||
HARDWARE_EVENT(PERF_COUNT_HW_CPU_CYCLES, PerfCpuCycles),
|
||||
@ -167,8 +180,16 @@ static const PerfEventInfo raw_events_info[] = {
|
||||
SOFTWARE_EVENT(PERF_COUNT_SW_CPU_MIGRATIONS, PerfCpuMigrations),
|
||||
SOFTWARE_EVENT(PERF_COUNT_SW_ALIGNMENT_FAULTS, PerfAlignmentFaults),
|
||||
SOFTWARE_EVENT(PERF_COUNT_SW_EMULATION_FAULTS, PerfEmulationFaults),
|
||||
SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor),
|
||||
SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor)
|
||||
|
||||
// Don't add them -- they are the same as SoftPageFaults and HardPageFaults,
|
||||
// match well numerically.
|
||||
// SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MIN, PerfPageFaultsMinor),
|
||||
// SOFTWARE_EVENT(PERF_COUNT_SW_PAGE_FAULTS_MAJ, PerfPageFaultsMajor),
|
||||
|
||||
CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBReferences, ACCESS),
|
||||
CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfDataTLBMisses, MISS),
|
||||
CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfInstructionTLBReferences, ACCESS),
|
||||
CACHE_EVENT(PERF_COUNT_HW_CACHE_DTLB, PerfInstructionTLBMisses, MISS),
|
||||
};
|
||||
|
||||
static_assert(sizeof(raw_events_info) / sizeof(raw_events_info[0]) == NUMBER_OF_RAW_EVENTS);
|
||||
@ -455,7 +476,12 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile
|
||||
}
|
||||
}
|
||||
|
||||
// actually process counters' values
|
||||
// Actually process counters' values. Track the minimal time that a performance
|
||||
// counter was enabled, and the corresponding running time, to give some idea
|
||||
// about the amount of counter multiplexing.
|
||||
UInt64 min_enabled_time = -1;
|
||||
UInt64 running_time_for_min_enabled_time = 0;
|
||||
|
||||
for (size_t i = 0; i < NUMBER_OF_RAW_EVENTS; ++i)
|
||||
{
|
||||
int fd = thread_events_descriptors_holder.descriptors[i];
|
||||
@ -469,14 +495,30 @@ void PerfEventsCounters::finalizeProfileEvents(ProfileEvents::Counters & profile
|
||||
// Account for counter multiplexing. time_running and time_enabled are
|
||||
// not reset by PERF_EVENT_IOC_RESET, so we don't use it and calculate
|
||||
// deltas from old values.
|
||||
const auto enabled = current_value.time_enabled - previous_value.time_enabled;
|
||||
const auto running = current_value.time_running - previous_value.time_running;
|
||||
const UInt64 delta = (current_value.value - previous_value.value)
|
||||
* (current_value.time_enabled - previous_value.time_enabled)
|
||||
/ std::max(1.f,
|
||||
float(current_value.time_running - previous_value.time_running));
|
||||
* enabled / std::max(1.f, float(running));
|
||||
|
||||
if (min_enabled_time > enabled)
|
||||
{
|
||||
min_enabled_time = enabled;
|
||||
running_time_for_min_enabled_time = running;
|
||||
}
|
||||
|
||||
profile_events.increment(info.profile_event, delta);
|
||||
}
|
||||
|
||||
// If we had at least one enabled event, also show multiplexing-related
|
||||
// statistics.
|
||||
if (min_enabled_time != UInt64(-1))
|
||||
{
|
||||
profile_events.increment(ProfileEvents::PerfMinEnabledTime,
|
||||
min_enabled_time);
|
||||
profile_events.increment(ProfileEvents::PerfMinEnabledRunningTime,
|
||||
running_time_for_min_enabled_time);
|
||||
}
|
||||
|
||||
// Store current counter values for the next profiling period.
|
||||
memcpy(previous_values, current_values, sizeof(current_values));
|
||||
}
|
||||
|
@ -53,8 +53,12 @@ namespace ProfileEvents
|
||||
extern const Event PerfCpuMigrations;
|
||||
extern const Event PerfAlignmentFaults;
|
||||
extern const Event PerfEmulationFaults;
|
||||
extern const Event PerfPageFaultsMinor;
|
||||
extern const Event PerfPageFaultsMajor;
|
||||
extern const Event PerfMinEnabledTime;
|
||||
extern const Event PerfMinEnabledRunningTime;
|
||||
extern const Event PerfDataTLBReferences;
|
||||
extern const Event PerfDataTLBMisses;
|
||||
extern const Event PerfInstructionTLBReferences;
|
||||
extern const Event PerfInstructionTLBMisses;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -158,7 +162,7 @@ struct PerfEventValue
|
||||
UInt64 time_running = 0;
|
||||
};
|
||||
|
||||
static constexpr size_t NUMBER_OF_RAW_EVENTS = 18;
|
||||
static constexpr size_t NUMBER_OF_RAW_EVENTS = 20;
|
||||
|
||||
struct PerfDescriptorsHolder : boost::noncopyable
|
||||
{
|
||||
|
@ -191,6 +191,10 @@ void ThreadStatus::finalizePerformanceCounters()
|
||||
performance_counters_finalized = true;
|
||||
updatePerformanceCounters();
|
||||
|
||||
// We want to close perf file descriptors if the perf events were enabled for
|
||||
// one query. What this code does in practice is less clear -- e.g., if I run
|
||||
// 'select 1 settings metrics_perf_events_enabled = 1', I still get
|
||||
// query_context->getSettingsRef().metrics_perf_events_enabled == 0 *shrug*.
|
||||
bool close_perf_descriptors = true;
|
||||
if (query_context)
|
||||
close_perf_descriptors = !query_context->getSettingsRef().metrics_perf_events_enabled;
|
||||
|
Loading…
Reference in New Issue
Block a user