Commit a4eaf7f1 authored by Peter Zijlstra's avatar Peter Zijlstra Committed by Ingo Molnar

perf: Rework the PMU methods

Replace pmu::{enable,disable,start,stop,unthrottle} with
pmu::{add,del,start,stop}, all of which take a flags argument.

The new interface extends the capability to stop a counter while
keeping it scheduled on the PMU. We replace the throttled state with
the generic stopped state.

This also allows us to efficiently stop/start counters over certain
code paths (like IRQ handlers).

It also allows scheduling a counter without it starting, allowing for
a generic frozen state (useful for rotating stopped counters).

The stopped state is implemented in two different ways, depending on
how the architecture implemented the throttled state:

 1) We disable the counter:
    a) the pmu has per-counter enable bits, we flip that
    b) we program a NOP event, preserving the counter state

 2) We store the counter state and ignore all read/overflow events
Signed-off-by: default avatarPeter Zijlstra <a.p.zijlstra@chello.nl>
Cc: paulus <paulus@samba.org>
Cc: stephane eranian <eranian@googlemail.com>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Lin Ming <ming.m.lin@intel.com>
Cc: Yanmin <yanmin_zhang@linux.intel.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: David Miller <davem@davemloft.net>
Cc: Michael Cree <mcree@orcon.net.nz>
LKML-Reference: <new-submission>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent fa407f35
......@@ -307,7 +307,7 @@ static unsigned long alpha_perf_event_update(struct perf_event *event,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
delta = (new_raw_count - (prev_raw_count & alpha_pmu->pmc_count_mask[idx])) + ovf;
/* It is possible on very rare occasions that the PMC has overflowed
* but the interrupt is yet to come. Detect and fix this situation.
......@@ -402,14 +402,13 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
struct hw_perf_event *hwc = &pe->hw;
int idx = hwc->idx;
if (cpuc->current_idx[j] != PMC_NO_INDEX) {
cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
continue;
if (cpuc->current_idx[j] == PMC_NO_INDEX) {
alpha_perf_event_set_period(pe, hwc, idx);
cpuc->current_idx[j] = idx;
}
alpha_perf_event_set_period(pe, hwc, idx);
cpuc->current_idx[j] = idx;
cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
if (!(hwc->state & PERF_HES_STOPPED))
cpuc->idx_mask |= (1<<cpuc->current_idx[j]);
}
cpuc->config = cpuc->event[0]->hw.config_base;
}
......@@ -420,7 +419,7 @@ static void maybe_change_configuration(struct cpu_hw_events *cpuc)
* - this function is called from outside this module via the pmu struct
* returned from perf event initialisation.
*/
static int alpha_pmu_enable(struct perf_event *event)
static int alpha_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int n0;
......@@ -455,6 +454,10 @@ static int alpha_pmu_enable(struct perf_event *event)
}
}
hwc->state = PERF_HES_UPTODATE;
if (!(flags & PERF_EF_START))
hwc->state |= PERF_HES_STOPPED;
local_irq_restore(flags);
perf_pmu_enable(event->pmu);
......@@ -467,7 +470,7 @@ static int alpha_pmu_enable(struct perf_event *event)
* - this function is called from outside this module via the pmu struct
* returned from perf event initialisation.
*/
static void alpha_pmu_disable(struct perf_event *event)
static void alpha_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
......@@ -514,13 +517,44 @@ static void alpha_pmu_read(struct perf_event *event)
}
static void alpha_pmu_unthrottle(struct perf_event *event)
static void alpha_pmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (!(hwc->state & PERF_HES_STOPPED)) {
cpuc->idx_mask &= !(1UL<<hwc->idx);
hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
alpha_perf_event_update(event, hwc, hwc->idx, 0);
hwc->state |= PERF_HES_UPTODATE;
}
if (cpuc->enabled)
wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
}
static void alpha_pmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
return;
if (flags & PERF_EF_RELOAD) {
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
alpha_perf_event_set_period(event, hwc, hwc->idx);
}
hwc->state = 0;
cpuc->idx_mask |= 1UL<<hwc->idx;
wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
if (cpuc->enabled)
wrperfmon(PERFMON_CMD_ENABLE, (1UL<<hwc->idx));
}
......@@ -671,7 +705,7 @@ static int alpha_pmu_event_init(struct perf_event *event)
/*
* Main entry point - enable HW performance counters.
*/
static void alpha_pmu_pmu_enable(struct pmu *pmu)
static void alpha_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -697,7 +731,7 @@ static void alpha_pmu_pmu_enable(struct pmu *pmu)
* Main entry point - disable HW performance counters.
*/
static void alpha_pmu_pmu_disable(struct pmu *pmu)
static void alpha_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
......@@ -711,13 +745,14 @@ static void alpha_pmu_pmu_disable(struct pmu *pmu)
}
static struct pmu pmu = {
.pmu_enable = alpha_pmu_pmu_enable,
.pmu_disable = alpha_pmu_pmu_disable,
.pmu_enable = alpha_pmu_enable,
.pmu_disable = alpha_pmu_disable,
.event_init = alpha_pmu_event_init,
.enable = alpha_pmu_enable,
.disable = alpha_pmu_disable,
.add = alpha_pmu_add,
.del = alpha_pmu_del,
.start = alpha_pmu_start,
.stop = alpha_pmu_stop,
.read = alpha_pmu_read,
.unthrottle = alpha_pmu_unthrottle,
};
......
......@@ -221,46 +221,56 @@ armpmu_event_update(struct perf_event *event,
}
static void
armpmu_disable(struct perf_event *event)
armpmu_read(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
WARN_ON(idx < 0);
clear_bit(idx, cpuc->active_mask);
armpmu->disable(hwc, idx);
barrier();
armpmu_event_update(event, hwc, idx);
cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
/* Don't read disabled counters! */
if (hwc->idx < 0)
return;
perf_event_update_userpage(event);
armpmu_event_update(event, hwc, hwc->idx);
}
static void
armpmu_read(struct perf_event *event)
armpmu_stop(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
/* Don't read disabled counters! */
if (hwc->idx < 0)
if (!armpmu)
return;
armpmu_event_update(event, hwc, hwc->idx);
/*
* ARM pmu always has to update the counter, so ignore
* PERF_EF_UPDATE, see comments in armpmu_start().
*/
if (!(hwc->state & PERF_HES_STOPPED)) {
armpmu->disable(hwc, hwc->idx);
barrier(); /* why? */
armpmu_event_update(event, hwc, hwc->idx);
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
}
}
static void
armpmu_unthrottle(struct perf_event *event)
armpmu_start(struct perf_event *event, int flags)
{
struct hw_perf_event *hwc = &event->hw;
if (!armpmu)
return;
/*
* ARM pmu always has to reprogram the period, so ignore
* PERF_EF_RELOAD, see the comment below.
*/
if (flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
hwc->state = 0;
/*
* Set the period again. Some counters can't be stopped, so when we
* were throttled we simply disabled the IRQ source and the counter
* were stopped we simply disabled the IRQ source and the counter
* may have been left counting. If we don't do this step then we may
* get an interrupt too soon or *way* too late if the overflow has
* happened since disabling.
......@@ -269,8 +279,25 @@ armpmu_unthrottle(struct perf_event *event)
armpmu->enable(hwc, hwc->idx);
}
static void
armpmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
int idx = hwc->idx;
WARN_ON(idx < 0);
clear_bit(idx, cpuc->active_mask);
armpmu_stop(event, PERF_EF_UPDATE);
cpuc->events[idx] = NULL;
clear_bit(idx, cpuc->used_mask);
perf_event_update_userpage(event);
}
static int
armpmu_enable(struct perf_event *event)
armpmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
......@@ -295,11 +322,9 @@ armpmu_enable(struct perf_event *event)
cpuc->events[idx] = event;
set_bit(idx, cpuc->active_mask);
/* Set the period for the event. */
armpmu_event_set_period(event, hwc, idx);
/* Enable the event. */
armpmu->enable(hwc, idx);
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
if (flags & PERF_EF_START)
armpmu_start(event, PERF_EF_RELOAD);
/* Propagate our changes to the userspace mapping. */
perf_event_update_userpage(event);
......@@ -534,7 +559,7 @@ static int armpmu_event_init(struct perf_event *event)
return err;
}
static void armpmu_pmu_enable(struct pmu *pmu)
static void armpmu_enable(struct pmu *pmu)
{
/* Enable all of the perf events on hardware. */
int idx;
......@@ -555,20 +580,21 @@ static void armpmu_pmu_enable(struct pmu *pmu)
armpmu->start();
}
static void armpmu_pmu_disable(struct pmu *pmu)
static void armpmu_disable(struct pmu *pmu)
{
if (armpmu)
armpmu->stop();
}
static struct pmu pmu = {
.pmu_enable = armpmu_pmu_enable,
.pmu_disable= armpmu_pmu_disable,
.event_init = armpmu_event_init,
.enable = armpmu_enable,
.disable = armpmu_disable,
.unthrottle = armpmu_unthrottle,
.read = armpmu_read,
.pmu_enable = armpmu_enable,
.pmu_disable = armpmu_disable,
.event_init = armpmu_event_init,
.add = armpmu_add,
.del = armpmu_del,
.start = armpmu_start,
.stop = armpmu_stop,
.read = armpmu_read,
};
/*
......
......@@ -402,6 +402,9 @@ static void power_pmu_read(struct perf_event *event)
{
s64 val, delta, prev;
if (event->hw.state & PERF_HES_STOPPED)
return;
if (!event->hw.idx)
return;
/*
......@@ -517,7 +520,7 @@ static void write_mmcr0(struct cpu_hw_events *cpuhw, unsigned long mmcr0)
* Disable all events to prevent PMU interrupts and to allow
* events to be added or removed.
*/
static void power_pmu_pmu_disable(struct pmu *pmu)
static void power_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -565,7 +568,7 @@ static void power_pmu_pmu_disable(struct pmu *pmu)
* If we were previously disabled and events were added, then
* put the new config on the PMU.
*/
static void power_pmu_pmu_enable(struct pmu *pmu)
static void power_pmu_enable(struct pmu *pmu)
{
struct perf_event *event;
struct cpu_hw_events *cpuhw;
......@@ -672,6 +675,8 @@ static void power_pmu_pmu_enable(struct pmu *pmu)
}
local64_set(&event->hw.prev_count, val);
event->hw.idx = idx;
if (event->hw.state & PERF_HES_STOPPED)
val = 0;
write_pmc(idx, val);
perf_event_update_userpage(event);
}
......@@ -727,7 +732,7 @@ static int collect_events(struct perf_event *group, int max_count,
* re-enable the PMU in order to get hw_perf_enable to do the
* actual work of reconfiguring the PMU.
*/
static int power_pmu_enable(struct perf_event *event)
static int power_pmu_add(struct perf_event *event, int ef_flags)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -749,6 +754,9 @@ static int power_pmu_enable(struct perf_event *event)
cpuhw->events[n0] = event->hw.config;
cpuhw->flags[n0] = event->hw.event_base;
if (!(ef_flags & PERF_EF_START))
event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
......@@ -777,7 +785,7 @@ static int power_pmu_enable(struct perf_event *event)
/*
* Remove a event from the PMU.
*/
static void power_pmu_disable(struct perf_event *event)
static void power_pmu_del(struct perf_event *event, int ef_flags)
{
struct cpu_hw_events *cpuhw;
long i;
......@@ -826,27 +834,53 @@ static void power_pmu_disable(struct perf_event *event)
}
/*
* Re-enable interrupts on a event after they were throttled
* because they were coming too fast.
* POWER-PMU does not support disabling individual counters, hence
* program their cycle counter to their max value and ignore the interrupts.
*/
static void power_pmu_unthrottle(struct perf_event *event)
static void power_pmu_start(struct perf_event *event, int ef_flags)
{
s64 val, left;
unsigned long flags;
s64 left;
if (!event->hw.idx || !event->hw.sample_period)
return;
if (!(event->hw.state & PERF_HES_STOPPED))
return;
if (ef_flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
local_irq_save(flags);
perf_pmu_disable(event->pmu);
event->hw.state = 0;
left = local64_read(&event->hw.period_left);
write_pmc(event->hw.idx, left);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
static void power_pmu_stop(struct perf_event *event, int ef_flags)
{
unsigned long flags;
if (!event->hw.idx || !event->hw.sample_period)
return;
if (event->hw.state & PERF_HES_STOPPED)
return;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
power_pmu_read(event);
left = event->hw.sample_period;
event->hw.last_period = left;
val = 0;
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
write_pmc(event->hw.idx, 0);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
......@@ -1131,13 +1165,14 @@ static int power_pmu_event_init(struct perf_event *event)
}
struct pmu power_pmu = {
.pmu_enable = power_pmu_pmu_enable,
.pmu_disable = power_pmu_pmu_disable,
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
.event_init = power_pmu_event_init,
.enable = power_pmu_enable,
.disable = power_pmu_disable,
.add = power_pmu_add,
.del = power_pmu_del,
.start = power_pmu_start,
.stop = power_pmu_stop,
.read = power_pmu_read,
.unthrottle = power_pmu_unthrottle,
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
......@@ -1155,6 +1190,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
s64 prev, delta, left;
int record = 0;
if (event->hw.state & PERF_HES_STOPPED) {
write_pmc(event->hw.idx, 0);
return;
}
/* we don't have to worry about interrupts here */
prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
......@@ -1177,6 +1217,11 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
val = 0x80000000LL - left;
}
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
/*
* Finally record data if requested.
*/
......@@ -1189,23 +1234,9 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type & PERF_SAMPLE_ADDR)
perf_get_data_addr(regs, &data.addr);
if (perf_event_overflow(event, nmi, &data, regs)) {
/*
* Interrupts are coming too fast - throttle them
* by setting the event to 0, so it will be
* at least 2^30 cycles until the next interrupt
* (assuming each event counts at most 2 counts
* per cycle).
*/
val = 0;
left = ~0ULL >> 1;
}
if (perf_event_overflow(event, nmi, &data, regs))
power_pmu_stop(event, 0);
}
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
}
/*
......
......@@ -156,6 +156,9 @@ static void fsl_emb_pmu_read(struct perf_event *event)
{
s64 val, delta, prev;
if (event->hw.state & PERF_HES_STOPPED)
return;
/*
* Performance monitor interrupts come even when interrupts
* are soft-disabled, as long as interrupts are hard-enabled.
......@@ -177,7 +180,7 @@ static void fsl_emb_pmu_read(struct perf_event *event)
* Disable all events to prevent PMU interrupts and to allow
* events to be added or removed.
*/
static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
static void fsl_emb_pmu_disable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -216,7 +219,7 @@ static void fsl_emb_pmu_pmu_disable(struct pmu *pmu)
* If we were previously disabled and events were added, then
* put the new config on the PMU.
*/
static void fsl_emb_pmu_pmu_enable(struct pmu *pmu)
static void fsl_emb_pmu_enable(struct pmu *pmu)
{
struct cpu_hw_events *cpuhw;
unsigned long flags;
......@@ -263,7 +266,7 @@ static int collect_events(struct perf_event *group, int max_count,
}
/* context locked on entry */
static int fsl_emb_pmu_enable(struct perf_event *event)
static int fsl_emb_pmu_add(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuhw;
int ret = -EAGAIN;
......@@ -302,6 +305,12 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
val = 0x80000000L - left;
}
local64_set(&event->hw.prev_count, val);
if (!(flags & PERF_EF_START)) {
event->hw.state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
val = 0;
}
write_pmc(i, val);
perf_event_update_userpage(event);
......@@ -316,7 +325,7 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
}
/* context locked on entry */
static void fsl_emb_pmu_disable(struct perf_event *event)
static void fsl_emb_pmu_del(struct perf_event *event, int flags)
{
struct cpu_hw_events *cpuhw;
int i = event->hw.idx;
......@@ -353,30 +362,49 @@ static void fsl_emb_pmu_disable(struct perf_event *event)
put_cpu_var(cpu_hw_events);
}
/*
* Re-enable interrupts on a event after they were throttled
* because they were coming too fast.
*
* Context is locked on entry, but perf is not disabled.
*/
static void fsl_emb_pmu_unthrottle(struct perf_event *event)
static void fsl_emb_pmu_start(struct perf_event *event, int ef_flags)
{
unsigned long flags;
s64 left;
if (event->hw.idx < 0 || !event->hw.sample_period)
return;
if (!(event->hw.state & PERF_HES_STOPPED))
return;
if (ef_flags & PERF_EF_RELOAD)
WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
local_irq_save(flags);
perf_pmu_disable(event->pmu);
event->hw.state = 0;
left = local64_read(&event->hw.period_left);
write_pmc(event->hw.idx, left);
perf_event_update_userpage(event);
perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
static void fsl_emb_pmu_stop(struct perf_event *event, int ef_flags)
{
s64 val, left;
unsigned long flags;
if (event->hw.idx < 0 || !event->hw.sample_period)
return;
if (event->hw.state & PERF_HES_STOPPED)
return;
local_irq_save(flags);
perf_pmu_disable(event->pmu);
fsl_emb_pmu_read(event);
left = event->hw.sample_period;
event->hw.last_period = left;
val = 0;
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
local64_set(&event->hw.prev_count, val);
local64_set(&event->hw.period_left, left);
event->hw.state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
write_pmc(event->hw.idx, 0);
perf_event_update_userpage(event);