Commit 0d8385a7 authored by Tvrtko Ursulin's avatar Tvrtko Ursulin

intel-gpu-overlay: Catch-up to new i915 PMU

v2: Update for i915 changes.
v3: Use 1eN for large numbers. (Chris Wilson)
v4: Update for upstream engine class enum.
Signed-off-by: Tvrtko Ursulin's avatarTvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson's avatarChris Wilson <chris@chris-wilson.co.uk>
parent 19c99246
/*
* Copyright © 2017 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
*/
#ifndef I915_PERF_H
#define I915_PERF_H
......@@ -5,41 +29,52 @@
#include <linux/perf_event.h>
#define I915_SAMPLE_BUSY 0
#define I915_SAMPLE_WAIT 1
#define I915_SAMPLE_SEMA 2
enum drm_i915_gem_engine_class {
I915_ENGINE_CLASS_RENDER = 0,
I915_ENGINE_CLASS_COPY = 1,
I915_ENGINE_CLASS_VIDEO = 2,
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
I915_ENGINE_CLASS_INVALID = -1
};
enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
I915_SAMPLE_SEMA = 2,
I915_ENGINE_SAMPLE_MAX /* non-ABI */
};
#define I915_SAMPLE_RCS 0
#define I915_SAMPLE_VCS 1
#define I915_SAMPLE_BCS 2
#define I915_SAMPLE_VECS 3
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
#define I915_PMU_CLASS_SHIFT \
(I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
#define __I915_PERF_COUNT(ring, id) ((ring) << 4 | (id))
#define __I915_PMU_ENGINE(class, instance, sample) \
((class) << I915_PMU_CLASS_SHIFT | \
(instance) << I915_PMU_SAMPLE_BITS | \
(sample))
#define I915_PERF_COUNT_RCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_BUSY)
#define I915_PERF_COUNT_RCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_WAIT)
#define I915_PERF_COUNT_RCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_RCS, I915_SAMPLE_SEMA)
#define I915_PMU_ENGINE_BUSY(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
#define I915_PERF_COUNT_VCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_BUSY)
#define I915_PERF_COUNT_VCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_WAIT)
#define I915_PERF_COUNT_VCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VCS, I915_SAMPLE_SEMA)
#define I915_PMU_ENGINE_WAIT(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
#define I915_PERF_COUNT_BCS_BUSY __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_BUSY)
#define I915_PERF_COUNT_BCS_WAIT __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_WAIT)
#define I915_PERF_COUNT_BCS_SEMA __I915_PERF_COUNT(I915_SAMPLE_BCS, I915_SAMPLE_SEMA)
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
#define I915_PERF_COUNT_VECS_BUSY __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_BUSY)
#define I915_PERF_COUNT_VECS_WAIT __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_WAIT)
#define I915_PERF_COUNT_VECS_SEMA __I915_PERF_COUNT(I915_SAMPLE_VECS, I915_SAMPLE_SEMA)
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PERF_ACTUAL_FREQUENCY 32
#define I915_PERF_REQUESTED_FREQUENCY 33
#define I915_PERF_ENERGY 34
#define I915_PERF_INTERRUPTS 35
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
#define I915_PMU_RC6p_RESIDENCY __I915_PMU_OTHER(4)
#define I915_PMU_RC6pp_RESIDENCY __I915_PMU_OTHER(5)
#define I915_PERF_RC6_RESIDENCY 40
#define I915_PERF_RC6p_RESIDENCY 41
#define I915_PERF_RC6pp_RESIDENCY 42
#define I915_PMU_LAST I915_PMU_RC6pp_RESIDENCY
static inline int
perf_event_open(struct perf_event_attr *attr,
......
......@@ -113,7 +113,7 @@ int gem_interrupts_init(struct gem_interrupts *irqs)
{
memset(irqs, 0, sizeof(*irqs));
irqs->fd = perf_i915_open(I915_PERF_INTERRUPTS);
irqs->fd = perf_i915_open(I915_PMU_INTERRUPTS);
if (irqs->fd < 0 && interrupts_read() < 0)
irqs->error = ENODEV;
......
......@@ -37,8 +37,8 @@ static int perf_open(void)
{
int fd;
fd = perf_i915_open_group(I915_PERF_ACTUAL_FREQUENCY, -1);
if (perf_i915_open_group(I915_PERF_REQUESTED_FREQUENCY, fd) < 0) {
fd = perf_i915_open_group(I915_PMU_ACTUAL_FREQUENCY, -1);
if (perf_i915_open_group(I915_PMU_REQUESTED_FREQUENCY, fd) < 0) {
close(fd);
fd = -1;
}
......@@ -176,8 +176,8 @@ int gpu_freq_update(struct gpu_freq *gf)
return EAGAIN;
}
gf->current = (s->act - d->act) / d_time;
gf->request = (s->req - d->req) / d_time;
gf->current = (s->act - d->act) * 1e9 / d_time;
gf->request = (s->req - d->req) * 1e9 / d_time;
}
return 0;
......
......@@ -43,49 +43,57 @@
#define RING_WAIT (1<<11)
#define RING_WAIT_SEMAPHORE (1<<10)
#define __I915_PERF_RING(n) (4*n)
#define I915_PERF_RING_BUSY(n) (__I915_PERF_RING(n) + 0)
#define I915_PERF_RING_WAIT(n) (__I915_PERF_RING(n) + 1)
#define I915_PERF_RING_SEMA(n) (__I915_PERF_RING(n) + 2)
static int perf_init(struct gpu_top *gt)
{
const char *names[] = {
"RCS",
"BCS",
"VCS0",
"VCS1",
NULL,
struct engine_desc {
unsigned class, inst;
const char *name;
} *d, engines[] = {
{ I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
{ I915_ENGINE_CLASS_COPY, 0, "bcs0" },
{ I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
{ I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },
{ 0, 0, NULL }
};
int n;
gt->fd = perf_i915_open_group(I915_PERF_RING_BUSY(0), -1);
d = &engines[0];
gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
-1);
if (gt->fd < 0)
return -1;
if (perf_i915_open_group(I915_PERF_RING_WAIT(0), gt->fd) >= 0)
if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
gt->fd) >= 0)
gt->have_wait = 1;
if (perf_i915_open_group(I915_PERF_RING_SEMA(0), gt->fd) >= 0)
if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
gt->fd) >= 0)
gt->have_sema = 1;
gt->ring[0].name = names[0];
gt->ring[0].name = d->name;
gt->num_rings = 1;
for (n = 1; names[n]; n++) {
if (perf_i915_open_group(I915_PERF_RING_BUSY(n), gt->fd) >= 0) {
if (gt->have_wait &&
perf_i915_open_group(I915_PERF_RING_WAIT(n),
gt->fd) < 0)
return -1;
if (gt->have_sema &&
perf_i915_open_group(I915_PERF_RING_SEMA(n),
gt->fd) < 0)
return -1;
gt->ring[gt->num_rings++].name = names[n];
}
for (d++; d->name; d++) {
if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
d->inst),
gt->fd) < 0)
continue;
if (gt->have_wait &&
perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
d->inst),
gt->fd) < 0)
return -1;
if (gt->have_sema &&
perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
d->inst),
gt->fd) < 0)
return -1;
gt->ring[gt->num_rings++].name = d->name;
}
return 0;
......
......@@ -45,9 +45,7 @@ int power_init(struct power *power)
memset(power, 0, sizeof(*power));
power->fd = perf_i915_open(I915_PERF_ENERGY);
if (power->fd != -1)
return 0;
power->fd = -1;
sprintf(buf, "%s/i915_energy_uJ", debugfs_dri_path);
fd = open(buf, 0);
......
......@@ -43,15 +43,15 @@ static int perf_open(unsigned *flags)
{
int fd;
fd = perf_i915_open_group(I915_PERF_RC6_RESIDENCY, -1);
fd = perf_i915_open_group(I915_PMU_RC6_RESIDENCY, -1);
if (fd < 0)
return -1;
*flags |= RC6;
if (perf_i915_open_group(I915_PERF_RC6p_RESIDENCY, fd) >= 0)
if (perf_i915_open_group(I915_PMU_RC6p_RESIDENCY, fd) >= 0)
*flags |= RC6p;
if (perf_i915_open_group(I915_PERF_RC6pp_RESIDENCY, fd) >= 0)
if (perf_i915_open_group(I915_PMU_RC6pp_RESIDENCY, fd) >= 0)
*flags |= RC6pp;
return fd;
......@@ -132,11 +132,11 @@ int rc6_update(struct rc6 *rc6)
len = 2;
if (rc6->flags & RC6)
s->rc6_residency = data[len++];
s->rc6_residency = data[len++] / 1e6;
if (rc6->flags & RC6p)
s->rc6p_residency = data[len++];
s->rc6p_residency = data[len++] / 1e6;
if (rc6->flags & RC6pp)
s->rc6pp_residency = data[len++];
s->rc6pp_residency = data[len++] / 1e6;
}
if (rc6->count == 1)
......@@ -149,14 +149,14 @@ int rc6_update(struct rc6 *rc6)
}
d_rc6 = s->rc6_residency - d->rc6_residency;
rc6->rc6 = (100 * d_rc6 + d_time/2) / d_time;
rc6->rc6 = 100 * d_rc6 / d_time;
d_rc6p = s->rc6p_residency - d->rc6p_residency;
rc6->rc6p = (100 * d_rc6p + d_time/2) / d_time;
rc6->rc6p = 100 * d_rc6p / d_time;
d_rc6pp = s->rc6pp_residency - d->rc6pp_residency;
rc6->rc6pp = (100 * d_rc6pp + d_time/2) / d_time;
rc6->rc6pp = 100 * d_rc6pp / d_time;
rc6->rc6_combined = (100 * (d_rc6 + d_rc6p + d_rc6pp) + d_time/2) / d_time;
rc6->rc6_combined = 100 * (d_rc6 + d_rc6p + d_rc6pp) / d_time;
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment