gpu-top.c 7.51 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright © 2013 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

25
#include <stdint.h>
26
#include <stdlib.h>
27 28
#include <stdio.h>
#include <string.h>
29 30 31
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
32
#include <assert.h>
33

34 35
#include "igt_perf.h"

36 37 38 39 40 41 42 43 44 45
#include "igfx.h"
#include "gpu-top.h"

#define RING_TAIL      0x00
#define RING_HEAD      0x04
#define ADDR_MASK      0x001FFFFC
#define RING_CTL       0x0C
#define   RING_WAIT		(1<<11)
#define   RING_WAIT_SEMAPHORE	(1<<10)

46 47
static int perf_init(struct gpu_top *gt)
{
48 49 50 51 52 53 54 55 56 57
	struct engine_desc {
		unsigned class, inst;
		const char *name;
	} *d, engines[] = {
		{ I915_ENGINE_CLASS_RENDER, 0, "rcs0" },
		{ I915_ENGINE_CLASS_COPY, 0, "bcs0" },
		{ I915_ENGINE_CLASS_VIDEO, 0, "vcs0" },
		{ I915_ENGINE_CLASS_VIDEO, 1, "vcs1" },
		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, "vecs0" },
		{ 0, 0, NULL }
58 59
	};

60 61 62 63
	d = &engines[0];

	gt->fd = perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class, d->inst),
				      -1);
64 65 66
	if (gt->fd < 0)
		return -1;

67 68
	if (perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class, d->inst),
				 gt->fd) >= 0)
69 70
		gt->have_wait = 1;

71 72
	if (perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class, d->inst),
				 gt->fd) >= 0)
73 74
		gt->have_sema = 1;

75
	gt->ring[0].name = d->name;
76 77
	gt->num_rings = 1;

78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
	for (d++; d->name; d++) {
		if (perf_i915_open_group(I915_PMU_ENGINE_BUSY(d->class,
							      d->inst),
					gt->fd) < 0)
			continue;

		if (gt->have_wait &&
		    perf_i915_open_group(I915_PMU_ENGINE_WAIT(d->class,
							      d->inst),
					 gt->fd) < 0)
			return -1;

		if (gt->have_sema &&
		    perf_i915_open_group(I915_PMU_ENGINE_SEMA(d->class,
							      d->inst),
				   gt->fd) < 0)
			return -1;

		gt->ring[gt->num_rings++].name = d->name;
97 98 99 100 101 102
	}

	return 0;
}

struct mmio_ring {
103
	int id;
104 105
	uint32_t base;
	void *mmio;
106 107 108
	int idle, wait, sema;
};

109
static uint32_t mmio_ring_read(struct mmio_ring *ring, uint32_t reg)
110
{
111
	return igfx_read(ring->mmio, ring->base + reg);
112 113
}

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
static int has_execlists(void)
{
	int detected = 0;
	FILE *file;

	file = fopen("/sys/module/i915/parameters/enable_execlists", "r");
	if (file) {
		int value;
		if (fscanf(file, "%d", &value) == 1)
			detected = value != 0;
		fclose(file);
	}

	return detected;

}

131
static void mmio_ring_init(struct mmio_ring *ring, void *mmio)
132 133 134
{
	uint32_t ctl;

135
	ring->mmio = mmio;
136 137

	ctl = mmio_ring_read(ring, RING_CTL);
138
	if ((ctl & 1) == 0 && !has_execlists())
139 140 141
		ring->id = -1;
}

142
static void mmio_ring_reset(struct mmio_ring *ring)
143 144 145 146 147 148
{
	ring->idle = 0;
	ring->wait = 0;
	ring->sema = 0;
}

149
static void mmio_ring_sample(struct mmio_ring *ring)
150 151 152 153 154 155
{
	uint32_t head, tail, ctl;

	if (ring->id == -1)
		return;

156 157
	head = mmio_ring_read(ring, RING_HEAD) & ADDR_MASK;
	tail = mmio_ring_read(ring, RING_TAIL) & ADDR_MASK;
158 159
	ring->idle += head == tail;

160
	ctl = mmio_ring_read(ring, RING_CTL);
161 162 163 164
	ring->wait += !!(ctl & RING_WAIT);
	ring->sema += !!(ctl & RING_WAIT_SEMAPHORE);
}

165
static void mmio_ring_emit(struct mmio_ring *ring, int samples, union gpu_top_payload *payload)
166 167 168 169 170 171 172 173 174
{
	if (ring->id == -1)
		return;

	payload[ring->id].u.busy = 100 - 100 * ring->idle / samples;
	payload[ring->id].u.wait = 100 * ring->wait / samples;
	payload[ring->id].u.sema = 100 * ring->sema / samples;
}

175
static void mmio_init(struct gpu_top *gt)
176
{
177 178
	struct mmio_ring render_ring = {
		.base = 0x2030,
179 180
		.id = 0,
	}, bsd_ring = {
181
		.base = 0x4030,
182 183
		.id = 1,
	}, bsd6_ring = {
184
		.base = 0x12030,
185 186
		.id = 1,
	}, blt_ring = {
187
		.base = 0x22030,
188 189 190 191
		.id = 2,
	};
	const struct igfx_info *info;
	struct pci_device *igfx;
192
	void *mmio;
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	int fd[2], i;

	igfx = igfx_get();
	if (!igfx)
		return;

	if (pipe(fd) < 0)
		return;

	info = igfx_get_info(igfx);

	switch (fork()) {
	case -1: return;
	default:
		 fcntl(fd[0], F_SETFL, fcntl(fd[0], F_GETFL) | O_NONBLOCK);
		 gt->fd = fd[0];
209
		 gt->type = MMIO;
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
		 gt->ring[0].name = "render";
		 gt->num_rings = 1;
		 if (info->gen >= 040) {
			 gt->ring[1].name = "bitstream";
			 gt->num_rings++;
		 }
		 if (info->gen >= 060) {
			 gt->ring[2].name = "blt";
			 gt->num_rings++;
		 }
		 close(fd[1]);
		 return;
	case 0:
		 close(fd[0]);
		 break;
	}

	mmio = igfx_get_mmio(igfx);
228 229
	if (mmio == NULL)
		exit(127);
230

231
	mmio_ring_init(&render_ring, mmio);
232
	if (info->gen >= 060) {
233
		bsd_ring = bsd6_ring;
234
		mmio_ring_init(&blt_ring, mmio);
235 236
	}
	if (info->gen >= 040) {
237
		mmio_ring_init(&bsd_ring, mmio);
238 239 240 241 242
	}

	for (;;) {
		union gpu_top_payload payload[MAX_RINGS];

243 244 245
		mmio_ring_reset(&render_ring);
		mmio_ring_reset(&bsd_ring);
		mmio_ring_reset(&blt_ring);
246 247

		for (i = 0; i < 1000; i++) {
248 249 250
			mmio_ring_sample(&render_ring);
			mmio_ring_sample(&bsd_ring);
			mmio_ring_sample(&blt_ring);
251 252 253
			usleep(1000);
		}

254
		memset(payload, 0, sizeof(payload));
255 256 257
		mmio_ring_emit(&render_ring, 1000, payload);
		mmio_ring_emit(&bsd_ring, 1000, payload);
		mmio_ring_emit(&blt_ring, 1000, payload);
258 259
		assert(write(fd[1], payload, sizeof(payload))
		       == sizeof(payload));
260 261 262
	}
}

263 264 265 266 267 268 269 270 271 272 273
void gpu_top_init(struct gpu_top *gt)
{
	memset(gt, 0, sizeof(*gt));
	gt->fd = -1;

	if (perf_init(gt) == 0)
		return;

	mmio_init(gt);
}

274 275 276
int gpu_top_update(struct gpu_top *gt)
{
	uint32_t data[1024];
277
	int update, len;
278 279

	if (gt->fd < 0)
280 281 282 283 284 285
		return 0;

	if (gt->type == PERF) {
		struct gpu_top_stat *s = &gt->stat[gt->count++&1];
		struct gpu_top_stat *d = &gt->stat[gt->count&1];
		uint64_t *sample, d_time;
286
		int n, m;
287 288 289 290 291 292 293 294

		len = read(gt->fd, data, sizeof(data));
		if (len < 0)
			return 0;

		sample = (uint64_t *)data + 1;

		s->time = *sample++;
295 296
		for (n = m = 0; n < gt->num_rings; n++) {
			s->busy[n] = sample[m++];
297
			if (gt->have_wait)
298
				s->wait[n] = sample[m++];
299
			if (gt->have_sema)
300
				s->sema[n] = sample[m++];
301 302 303 304 305 306 307
		}

		if (gt->count == 1)
			return 0;

		d_time = s->time - d->time;
		for (n = 0; n < gt->num_rings; n++) {
308
			gt->ring[n].u.u.busy = (100 * (s->busy[n] - d->busy[n]) + d_time/2) / d_time;
309
			if (gt->have_wait)
310
				gt->ring[n].u.u.wait = (100 * (s->wait[n] - d->wait[n]) + d_time/2) / d_time;
311
			if (gt->have_sema)
312 313 314 315 316 317 318 319 320
				gt->ring[n].u.u.sema = (100 * (s->sema[n] - d->sema[n]) + d_time/2) / d_time;

			/* in case of rounding + sampling errors, fudge */
			if (gt->ring[n].u.u.busy > 100)
				gt->ring[n].u.u.busy = 100;
			if (gt->ring[n].u.u.wait > 100)
				gt->ring[n].u.u.wait = 100;
			if (gt->ring[n].u.u.sema > 100)
				gt->ring[n].u.u.sema = 100;
321 322
		}

323
		update = 1;
324 325 326 327 328 329 330 331 332
	} else {
		while ((len = read(gt->fd, data, sizeof(data))) > 0) {
			uint32_t *ptr = &data[len/sizeof(uint32_t) - MAX_RINGS];
			gt->ring[0].u.payload = ptr[0];
			gt->ring[1].u.payload = ptr[1];
			gt->ring[2].u.payload = ptr[2];
			gt->ring[3].u.payload = ptr[3];
			update = 1;
		}
333 334 335 336
	}

	return update;
}