gem_exec_nop.c 26.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
/*
 * Copyright © 2011 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Chris Wilson <chris@chris-wilson.co.uk>
 *
 */

#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <fcntl.h>
#include <inttypes.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
38
#include <sys/poll.h>
39
#include <sys/time.h>
40
#include <time.h>
41

42
#include "drm.h"
43 44 45 46 47 48
#include "i915/gem.h"
#include "igt.h"
#include "igt_device.h"
#include "igt_rand.h"
#include "igt_sysfs.h"

49

50
#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | I915_EXEC_BSD_MASK)
51

52 53
#define MAX_PRIO I915_CONTEXT_MAX_USER_PRIORITY
#define MIN_PRIO I915_CONTEXT_MIN_USER_PRIORITY
54
#define MAX_ENGINES (I915_EXEC_RING_MASK + 1)
55

56 57
#define FORKED (1 << 0)
#define CONTEXT (1 << 1)
58

59 60 61 62 63 64
static double elapsed(const struct timespec *start, const struct timespec *end)
{
	return ((end->tv_sec - start->tv_sec) +
		(end->tv_nsec - start->tv_nsec)*1e-9);
}

65
static double nop_on_ring(int fd, uint32_t handle,
66
			  const struct intel_execution_engine2 *e,
67
			  int timeout_ms,
68
			  unsigned long *out)
69 70
{
	struct drm_i915_gem_execbuffer2 execbuf;
71 72
	struct drm_i915_gem_exec_object2 obj;
	struct timespec start, now;
73
	unsigned long count;
74

75 76
	memset(&obj, 0, sizeof(obj));
	obj.handle = handle;
77

78
	memset(&execbuf, 0, sizeof(execbuf));
79
	execbuf.buffers_ptr = to_user_pointer(&obj);
80
	execbuf.buffer_count = 1;
81
	execbuf.flags = e->flags;
82 83
	execbuf.flags |= I915_EXEC_HANDLE_LUT;
	execbuf.flags |= I915_EXEC_NO_RELOC;
84
	if (__gem_execbuf(fd, &execbuf)) {
85
		execbuf.flags = e->flags;
86
		gem_execbuf(fd, &execbuf);
87
	}
88
	intel_detect_and_clear_missed_interrupts(fd);
89

90
	count = 0;
91 92
	clock_gettime(CLOCK_MONOTONIC, &start);
	do {
93 94
		gem_execbuf(fd, &execbuf);
		count++;
95

96
		clock_gettime(CLOCK_MONOTONIC, &now);
97
	} while (elapsed(&start, &now) < timeout_ms * 1e-3);
98
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
99

100 101 102 103
	*out = count;
	return elapsed(&start, &now);
}

104 105
static void poll_ring(int fd, const struct intel_execution_engine2 *e,
		      int timeout)
106
{
107
	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
108 109 110 111 112 113 114
	const uint32_t MI_ARB_CHK = 0x5 << 23;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
	struct drm_i915_gem_relocation_entry reloc[4], *r;
	uint32_t *bbe[2], *state, *batch;
	struct timespec tv = {};
	unsigned long cycles;
115
	unsigned flags;
116 117 118 119 120 121
	uint64_t elapsed;

	flags = I915_EXEC_NO_RELOC;
	if (gen == 4 || gen == 5)
		flags |= I915_EXEC_SECURE;

122 123
	igt_require(gem_class_can_store_dword(fd, e->class));
	igt_require(gem_class_has_mutable_submission(fd, e->class));
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	obj.relocs_ptr = to_user_pointer(reloc);
	obj.relocation_count = ARRAY_SIZE(reloc);

	r = memset(reloc, 0, sizeof(reloc));
	batch = gem_mmap__wc(fd, obj.handle, 0, 4096, PROT_WRITE);

	for (unsigned int start_offset = 0;
	     start_offset <= 128;
	     start_offset += 128) {
		uint32_t *b = batch + start_offset / sizeof(*batch);

		r->target_handle = obj.handle;
		r->offset = (b - batch + 1) * sizeof(uint32_t);
		r->delta = 4092;
		r->read_domains = I915_GEM_DOMAIN_RENDER;

		*b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
		if (gen >= 8) {
			*++b = r->delta;
			*++b = 0;
		} else if (gen >= 4) {
			r->offset += sizeof(uint32_t);
			*++b = 0;
			*++b = r->delta;
		} else {
			*b -= 1;
			*++b = r->delta;
		}
		*++b = start_offset != 0;
		r++;

		b = batch + (start_offset + 64) / sizeof(*batch);
		bbe[start_offset != 0] = b;
		*b++ = MI_ARB_CHK;

		r->target_handle = obj.handle;
		r->offset = (b - batch + 1) * sizeof(uint32_t);
		r->read_domains = I915_GEM_DOMAIN_COMMAND;
		r->delta = start_offset + 64;
		if (gen >= 8) {
			*b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
			*b++ = r->delta;
			*b++ = 0;
		} else if (gen >= 6) {
			*b++ = MI_BATCH_BUFFER_START | 1 << 8;
			*b++ = r->delta;
		} else {
			*b++ = MI_BATCH_BUFFER_START | 2 << 6;
			if (gen < 4)
				r->delta |= 1;
			*b++ = r->delta;
		}
		r++;
	}
	igt_assert(r == reloc + ARRAY_SIZE(reloc));
	state = batch + 1023;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
187
	execbuf.flags = e->flags | flags;
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208

	cycles = 0;
	do {
		unsigned int idx = ++cycles & 1;

		*bbe[idx] = MI_ARB_CHK;
		execbuf.batch_start_offset =
			(bbe[idx] - batch) * sizeof(*batch) - 64;

		gem_execbuf(fd, &execbuf);

		*bbe[!idx] = MI_BATCH_BUFFER_END;
		__sync_synchronize();

		while (READ_ONCE(*state) != idx)
			;
	} while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout);
	*bbe[cycles & 1] = MI_BATCH_BUFFER_END;
	gem_sync(fd, obj.handle);

	igt_info("%s completed %ld cycles: %.3f us\n",
209
		 e->name, cycles, elapsed*1e-3/cycles);
210 211 212 213 214

	munmap(batch, 4096);
	gem_close(fd, obj.handle);
}

215 216
static void poll_sequential(int fd, const char *name, int timeout)
{
217
	const unsigned int gen = intel_gen(intel_get_drm_devid(fd));
218
	const struct intel_execution_engine2 *e;
219 220 221 222 223
	const uint32_t MI_ARB_CHK = 0x5 << 23;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj[2];
	struct drm_i915_gem_relocation_entry reloc[4], *r;
	uint32_t *bbe[2], *state, *batch;
224
	unsigned engines[MAX_ENGINES], nengine, flags;
225 226 227 228 229 230 231 232 233 234
	struct timespec tv = {};
	unsigned long cycles;
	uint64_t elapsed;
	bool cached;

	flags = I915_EXEC_NO_RELOC;
	if (gen == 4 || gen == 5)
		flags |= I915_EXEC_SECURE;

	nengine = 0;
235 236 237
	__for_each_physical_engine(fd, e) {
		if (!gem_class_can_store_dword(fd, e->class) ||
		    !gem_class_has_mutable_submission(fd, e->class))
238 239
			continue;

240
		engines[nengine++] = e->flags;
241
	}
242

243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
	igt_require(nengine);

	memset(obj, 0, sizeof(obj));
	obj[0].handle = gem_create(fd, 4096);
	obj[0].flags = EXEC_OBJECT_WRITE;
	cached = __gem_set_caching(fd, obj[0].handle, 1) == 0;
	obj[1].handle = gem_create(fd, 4096);
	obj[1].relocs_ptr = to_user_pointer(reloc);
	obj[1].relocation_count = ARRAY_SIZE(reloc);

	r = memset(reloc, 0, sizeof(reloc));
	batch = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_WRITE);

	for (unsigned int start_offset = 0;
	     start_offset <= 128;
	     start_offset += 128) {
		uint32_t *b = batch + start_offset / sizeof(*batch);

		r->target_handle = obj[0].handle;
		r->offset = (b - batch + 1) * sizeof(uint32_t);
		r->delta = 0;
		r->read_domains = I915_GEM_DOMAIN_RENDER;
		r->write_domain = I915_GEM_DOMAIN_RENDER;

		*b = MI_STORE_DWORD_IMM | (gen < 6 ? 1 << 22 : 0);
		if (gen >= 8) {
			*++b = r->delta;
			*++b = 0;
		} else if (gen >= 4) {
			r->offset += sizeof(uint32_t);
			*++b = 0;
			*++b = r->delta;
		} else {
			*b -= 1;
			*++b = r->delta;
		}
		*++b = start_offset != 0;
		r++;

		b = batch + (start_offset + 64) / sizeof(*batch);
		bbe[start_offset != 0] = b;
		*b++ = MI_ARB_CHK;

		r->target_handle = obj[1].handle;
		r->offset = (b - batch + 1) * sizeof(uint32_t);
		r->read_domains = I915_GEM_DOMAIN_COMMAND;
		r->delta = start_offset + 64;
		if (gen >= 8) {
			*b++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
			*b++ = r->delta;
			*b++ = 0;
		} else if (gen >= 6) {
			*b++ = MI_BATCH_BUFFER_START | 1 << 8;
			*b++ = r->delta;
		} else {
			*b++ = MI_BATCH_BUFFER_START | 2 << 6;
			if (gen < 4)
				r->delta |= 1;
			*b++ = r->delta;
		}
		r++;
	}
	igt_assert(r == reloc + ARRAY_SIZE(reloc));

	if (cached)
		state = gem_mmap__cpu(fd, obj[0].handle, 0, 4096, PROT_READ);
	else
		state = gem_mmap__wc(fd, obj[0].handle, 0, 4096, PROT_READ);

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(obj);
	execbuf.buffer_count = ARRAY_SIZE(obj);

	cycles = 0;
	do {
		unsigned int idx = ++cycles & 1;

		*bbe[idx] = MI_ARB_CHK;
		execbuf.batch_start_offset =
			(bbe[idx] - batch) * sizeof(*batch) - 64;

		execbuf.flags = engines[cycles % nengine] | flags;
		gem_execbuf(fd, &execbuf);

		*bbe[!idx] = MI_BATCH_BUFFER_END;
		__sync_synchronize();

		while (READ_ONCE(*state) != idx)
			;
	} while ((elapsed = igt_nsec_elapsed(&tv)) >> 30 < timeout);
	*bbe[cycles & 1] = MI_BATCH_BUFFER_END;
	gem_sync(fd, obj[1].handle);

	igt_info("%s completed %ld cycles: %.3f us\n",
		 name, cycles, elapsed*1e-3/cycles);

	munmap(state, 4096);
	munmap(batch, 4096);
	gem_close(fd, obj[1].handle);
	gem_close(fd, obj[0].handle);
}

345
static void single(int fd, uint32_t handle,
346
		   const struct intel_execution_engine2 *e)
347 348 349 350
{
	double time;
	unsigned long count;

351
	time = nop_on_ring(fd, handle, e, 20000, &count);
352
	igt_info("%s: %'lu cycles: %.3fus\n",
353
		  e->name, count, time*1e6 / count);
354 355
}

356
static double
357
stable_nop_on_ring(int fd, uint32_t handle,
358 359
		   const struct intel_execution_engine2 *e,
		   int timeout_ms,
360
		   int reps)
361 362 363 364 365 366 367 368 369 370 371 372 373
{
	igt_stats_t s;
	double n;

	igt_assert(reps >= 5);

	igt_stats_init_with_size(&s, reps);
	s.is_float = true;

	while (reps--) {
		unsigned long count;
		double time;

374
		time = nop_on_ring(fd, handle, e, timeout_ms, &count);
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
		igt_stats_push_float(&s, time / count);
	}

	n = igt_stats_get_median(&s);
	igt_stats_fini(&s);

	return n;
}

#define assert_within_epsilon(x, ref, tolerance) \
        igt_assert_f((x) <= (1.0 + tolerance) * ref && \
                     (x) >= (1.0 - tolerance) * ref, \
                     "'%s' != '%s' (%f not within %f%% tolerance of %f)\n",\
                     #x, #ref, x, tolerance * 100.0, ref)

390 391
static void headless(int fd, uint32_t handle,
		     const struct intel_execution_engine2 *e)
392 393
{
	unsigned int nr_connected = 0;
394
	double n_display, n_headless;
395
	drmModeConnector *connector;
396
	unsigned long count;
397 398 399
	drmModeRes *res;

	res = drmModeGetResources(fd);
400
	igt_require(res);
401 402 403 404 405 406 407 408 409 410 411 412 413

	/* require at least one connected connector for the test */
	for (int i = 0; i < res->count_connectors; i++) {
		connector = drmModeGetConnectorCurrent(fd, res->connectors[i]);
		if (connector->connection == DRM_MODE_CONNECTED)
			nr_connected++;
		drmModeFreeConnector(connector);
	}
	igt_require(nr_connected > 0);

	/* set graphics mode to prevent blanking */
	kmstest_set_vt_graphics_mode();

414 415 416
	nop_on_ring(fd, handle, e, 10, &count);
	igt_require_f(count > 100, "submillisecond precision required\n");

417
	/* benchmark nops */
418
	n_display = stable_nop_on_ring(fd, handle, e, 500, 5);
419 420
	igt_info("With one display connected: %.2fus\n",
		 n_display * 1e6);
421 422 423 424 425

	/* force all connectors off */
	kmstest_unset_all_crtcs(fd, res);

	/* benchmark nops again */
426
	n_headless = stable_nop_on_ring(fd, handle, e, 500, 5);
427 428
	igt_info("Without a display connected (headless): %.2fus\n",
		 n_headless * 1e6);
429 430 431 432 433

	/* check that the two execution speeds are roughly the same */
	assert_within_epsilon(n_headless, n_display, 0.1f);
}

434 435
static void parallel(int fd, uint32_t handle, int timeout)
{
436
	const struct intel_execution_engine2 *e;
437 438
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
439 440
	unsigned engines[MAX_ENGINES];
	char *names[MAX_ENGINES];
441 442 443 444 445 446
	unsigned nengine;
	unsigned long count;
	double time, sum;

	sum = 0;
	nengine = 0;
447

448 449
	__for_each_physical_engine(fd, e) {
		engines[nengine] = e->flags;
450
		names[nengine++] = strdup(e->name);
451

452
		time = nop_on_ring(fd, handle, e, 250, &count) / count;
453
		sum += time;
454
		igt_debug("%s: %.3fus\n", e->name, 1e6*time);
455 456 457 458 459 460 461 462
	}
	igt_require(nengine);
	igt_info("average (individually): %.3fus\n", sum/nengine*1e6);

	memset(&obj, 0, sizeof(obj));
	obj.handle = handle;

	memset(&execbuf, 0, sizeof(execbuf));
463
	execbuf.buffers_ptr = to_user_pointer(&obj);
464
	execbuf.buffer_count = 1;
465 466
	execbuf.flags |= I915_EXEC_HANDLE_LUT;
	execbuf.flags |= I915_EXEC_NO_RELOC;
467 468 469 470 471 472 473
	if (__gem_execbuf(fd, &execbuf)) {
		execbuf.flags = 0;
		gem_execbuf(fd, &execbuf);
	}
	intel_detect_and_clear_missed_interrupts(fd);

	igt_fork(child, nengine) {
474 475
		struct timespec start, now;

476 477 478 479 480 481
		execbuf.flags &= ~ENGINE_FLAGS;
		execbuf.flags |= engines[child];

		count = 0;
		clock_gettime(CLOCK_MONOTONIC, &start);
		do {
482 483 484
			gem_execbuf(fd, &execbuf);
			count++;

485 486 487 488 489
			clock_gettime(CLOCK_MONOTONIC, &now);
		} while (elapsed(&start, &now) < timeout);
		time = elapsed(&start, &now) / count;
		igt_info("%s: %ld cycles, %.3fus\n", names[child], count, 1e6*time);
	}
490 491
	while (nengine--)
		free(names[nengine]);
492 493 494 495 496

	igt_waitchildren();
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
}

497 498 499 500 501
static void independent(int fd, uint32_t handle, int timeout)
{
	const struct intel_execution_engine2 *e;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
502 503
	unsigned engines[MAX_ENGINES];
	char *names[MAX_ENGINES];
504 505 506 507 508 509 510 511 512 513
	unsigned nengine;
	unsigned long count;
	double time, sum;

	sum = 0;
	nengine = 0;
	__for_each_physical_engine(fd, e) {
		engines[nengine] = e->flags;
		names[nengine++] = strdup(e->name);

514
		time = nop_on_ring(fd, handle, e, 250, &count) / count;
515 516 517 518 519 520 521 522 523 524 525 526
		sum += time;
		igt_debug("%s: %.3fus\n", e->name, 1e6*time);
	}
	igt_require(nengine);
	igt_info("average (individually): %.3fus\n", sum/nengine*1e6);

	memset(&obj, 0, sizeof(obj));
	obj.handle = handle;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
527 528
	execbuf.flags |= I915_EXEC_HANDLE_LUT;
	execbuf.flags |= I915_EXEC_NO_RELOC;
529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
	if (__gem_execbuf(fd, &execbuf)) {
		execbuf.flags = 0;
		gem_execbuf(fd, &execbuf);
	}
	intel_detect_and_clear_missed_interrupts(fd);

	igt_fork(child, nengine) {
		const uint32_t bbe = MI_BATCH_BUFFER_END;
		struct timespec start, now;

		obj.handle = gem_create(fd, 4096);
		gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

		execbuf.flags &= ~ENGINE_FLAGS;
		execbuf.flags |= engines[child];

		count = 0;
		clock_gettime(CLOCK_MONOTONIC, &start);
		do {
548 549 550
			gem_execbuf(fd, &execbuf);
			count++;

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
			clock_gettime(CLOCK_MONOTONIC, &now);
		} while (elapsed(&start, &now) < timeout);
		time = elapsed(&start, &now) / count;
		igt_info("%s: %ld cycles, %.3fus\n", names[child], count, 1e6*time);

		gem_close(fd, obj.handle);
	}
	while (nengine--)
		free(names[nengine]);

	igt_waitchildren();
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
}

static void multiple(int fd,
		     const struct intel_execution_engine2 *e,
		     int timeout)
{
	const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
	const uint32_t bbe = MI_BATCH_BUFFER_END;
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;

	memset(&obj, 0, sizeof(obj));
	obj.handle = gem_create(fd, 4096);
	gem_write(fd, obj.handle, 0, &bbe, sizeof(bbe));

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
	execbuf.flags = e->flags;
582 583
	execbuf.flags |= I915_EXEC_HANDLE_LUT;
	execbuf.flags |= I915_EXEC_NO_RELOC;
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
	if (__gem_execbuf(fd, &execbuf)) {
		execbuf.flags = e->flags;
		gem_execbuf(fd, &execbuf);
	}
	intel_detect_and_clear_missed_interrupts(fd);

	igt_fork(child, ncpus) {
		struct timespec start, now;
		unsigned long count;
		double time;
		int i915;

		i915 = gem_reopen_driver(fd);
		gem_context_copy_engines(fd, 0, i915, 0);

		obj.handle = gem_create(i915, 4096);
		gem_write(i915, obj.handle, 0, &bbe, sizeof(bbe));

		count = 0;
		clock_gettime(CLOCK_MONOTONIC, &start);
		do {
605 606 607
			gem_execbuf(i915, &execbuf);
			count++;

608 609 610 611 612 613 614 615 616 617 618 619
			clock_gettime(CLOCK_MONOTONIC, &now);
		} while (elapsed(&start, &now) < timeout);
		time = elapsed(&start, &now) / count;
		igt_info("%d: %ld cycles, %.3fus\n", child, count, 1e6*time);
	}

	igt_waitchildren();
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);

	gem_close(fd, obj.handle);
}

620
static void series(int fd, uint32_t handle, int timeout)
621
{
622
	const struct intel_execution_engine2 *e;
623 624
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
625
	struct timespec start, now, sync;
626
	unsigned engines[MAX_ENGINES];
627
	unsigned nengine;
628
	unsigned long count;
629
	double time, max = 0, min = HUGE_VAL, sum = 0;
630
	const char *name;
631 632

	nengine = 0;
633
	__for_each_physical_engine(fd, e) {
634
		time = nop_on_ring(fd, handle, e, 250, &count) / count;
635
		if (time > max) {
636
			name = e->name;
637 638
			max = time;
		}
639 640
		if (time < min)
			min = time;
641
		sum += time;
642
		engines[nengine++] = e->flags;
643
	}
644
	igt_require(nengine);
645 646
	igt_info("Maximum execution latency on %s, %.3fus, min %.3fus, total %.3fus per cycle, average %.3fus\n",
		 name, max*1e6, min*1e6, sum*1e6, sum/nengine*1e6);
647 648 649 650 651

	memset(&obj, 0, sizeof(obj));
	obj.handle = handle;

	memset(&execbuf, 0, sizeof(execbuf));
652
	execbuf.buffers_ptr = to_user_pointer(&obj);
653
	execbuf.buffer_count = 1;
654 655
	execbuf.flags |= I915_EXEC_HANDLE_LUT;
	execbuf.flags |= I915_EXEC_NO_RELOC;
656
	if (__gem_execbuf(fd, &execbuf)) {
657 658
		execbuf.flags = 0;
		gem_execbuf(fd, &execbuf);
659
	}
660
	intel_detect_and_clear_missed_interrupts(fd);
661

662
	count = 0;
663 664
	clock_gettime(CLOCK_MONOTONIC, &start);
	do {
665 666 667 668
		for (int n = 0; n < nengine; n++) {
			execbuf.flags &= ~ENGINE_FLAGS;
			execbuf.flags |= engines[n];
			gem_execbuf(fd, &execbuf);
669
		}
670
		count += nengine;
671
		clock_gettime(CLOCK_MONOTONIC, &now);
672
	} while (elapsed(&start, &now) < timeout); /* Hang detection ~120s */
673
	gem_sync(fd, handle);
674 675
	clock_gettime(CLOCK_MONOTONIC, &sync);
	igt_debug("sync time: %.3fus\n", elapsed(&now, &sync)*1e6);
676
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);
677

678
	time = elapsed(&start, &now) / count;
679
	igt_info("All (%d engines): %'lu cycles, average %.3fus per cycle [expected %.3fus]\n",
680
		 nengine, count, 1e6*time, 1e6*((max-min)/nengine+min));
681
}
Daniel Vetter's avatar
Daniel Vetter committed
682

683 684 685 686 687 688 689 690 691 692 693
static void xchg(void *array, unsigned i, unsigned j)
{
	unsigned *u = array;
	unsigned tmp = u[i];
	u[i] = u[j];
	u[j] = tmp;
}

static void sequential(int fd, uint32_t handle, unsigned flags, int timeout)
{
	const int ncpus = flags & FORKED ? sysconf(_SC_NPROCESSORS_ONLN) : 1;
694
	const struct intel_execution_engine2 *e;
695 696
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj[2];
697
	unsigned engines[MAX_ENGINES];
698
	unsigned nengine;
699
	double *results;
700
	double time, sum;
701 702
	unsigned n;

703 704
	gem_require_contexts(fd);

705 706
	results = mmap(NULL, 4096, PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
	igt_assert(results != MAP_FAILED);
707 708 709

	nengine = 0;
	sum = 0;
710
	__for_each_physical_engine(fd, e) {
711 712
		unsigned long count;

713
		time = nop_on_ring(fd, handle, e, 250, &count) / count;
714
		sum += time;
715
		igt_debug("%s: %.3fus\n", e->name, 1e6*time);
716

717
		engines[nengine++] = e->flags;
718 719 720 721 722 723 724 725 726 727 728 729 730
	}
	igt_require(nengine);
	igt_info("Total (individual) execution latency %.3fus per cycle\n",
		 1e6*sum);

	memset(obj, 0, sizeof(obj));
	obj[0].handle = gem_create(fd, 4096);
	obj[0].flags = EXEC_OBJECT_WRITE;
	obj[1].handle = handle;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(obj);
	execbuf.buffer_count = 2;
731 732
	execbuf.flags |= I915_EXEC_HANDLE_LUT;
	execbuf.flags |= I915_EXEC_NO_RELOC;
733 734
	igt_require(__gem_execbuf(fd, &execbuf) == 0);

735
	if (flags & CONTEXT) {
736 737
		gem_require_contexts(fd);
		execbuf.rsvd1 = gem_context_clone_with_engines(fd, 0);
738 739 740 741 742 743 744 745
	}

	for (n = 0; n < nengine; n++) {
		execbuf.flags &= ~ENGINE_FLAGS;
		execbuf.flags |= engines[n];
		igt_require(__gem_execbuf(fd, &execbuf) == 0);
	}

746 747
	intel_detect_and_clear_missed_interrupts(fd);

748 749 750 751 752 753 754
	igt_fork(child, ncpus) {
		struct timespec start, now;
		unsigned long count;

		obj[0].handle = gem_create(fd, 4096);
		gem_execbuf(fd, &execbuf);

755 756 757 758
		if (flags & CONTEXT) {
			gem_require_contexts(fd);
			execbuf.rsvd1 = gem_context_clone_with_engines(fd, 0);
		}
759 760 761 762 763 764 765

		hars_petruska_f54_1_random_perturb(child);

		count = 0;
		clock_gettime(CLOCK_MONOTONIC, &start);
		do {
			igt_permute_array(engines, nengine, xchg);
766 767 768 769
			for (n = 0; n < nengine; n++) {
				execbuf.flags &= ~ENGINE_FLAGS;
				execbuf.flags |= engines[n];
				gem_execbuf(fd, &execbuf);
770
			}
771
			count++;
772
			clock_gettime(CLOCK_MONOTONIC, &now);
773
		} while (elapsed(&start, &now) < timeout);
774 775 776

		gem_sync(fd, obj[0].handle);
		clock_gettime(CLOCK_MONOTONIC, &now);
777 778 779 780 781 782 783 784
		results[child] = elapsed(&start, &now) / count;

		if (flags & CONTEXT)
			gem_context_destroy(fd, execbuf.rsvd1);

		gem_close(fd, obj[0].handle);
	}
	igt_waitchildren();
785 786
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);

787 788 789 790 791 792 793 794 795 796
	results[ncpus] = 0;
	for (n = 0; n < ncpus; n++)
		results[ncpus] += results[n];
	results[ncpus] /= ncpus;

	igt_info("Sequential (%d engines, %d processes): average %.3fus per cycle [expected %.3fus]\n",
		 nengine, ncpus, 1e6*results[ncpus], 1e6*sum*ncpus);

	if (flags & CONTEXT)
		gem_context_destroy(fd, execbuf.rsvd1);
797 798

	gem_close(fd, obj[0].handle);
799
	munmap(results, 4096);
800 801
}

802 803 804 805 806 807 808 809 810 811
static bool fence_enable_signaling(int fence)
{
	return poll(&(struct pollfd){fence, POLLIN}, 1, 0) == 0;
}

static bool fence_wait(int fence)
{
	return poll(&(struct pollfd){fence, POLLIN}, 1, -1) == 1;
}

812
static void fence_signal(int fd, uint32_t handle,
813 814
			 const struct intel_execution_engine2 *ring_id,
			 const char *ring_name, int timeout)
815 816 817 818
{
#define NFENCES 512
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
819
	struct intel_execution_engine2 *__e;
820
	struct timespec start, now;
821
	unsigned engines[MAX_ENGINES];
822 823 824 825 826 827 828
	unsigned nengine;
	int *fences, n;
	unsigned long count, signal;

	igt_require(gem_has_exec_fence(fd));

	nengine = 0;
829 830 831
	if (!ring_id) {
		__for_each_physical_engine(fd, __e)
			engines[nengine++] = __e->flags;
832
	} else {
833
		engines[nengine++] = ring_id->flags;
834 835 836 837 838 839 840 841 842 843 844 845 846
	}
	igt_require(nengine);

	fences = malloc(sizeof(*fences) * NFENCES);
	igt_assert(fences);
	memset(fences, -1, sizeof(*fences) * NFENCES);

	memset(&obj, 0, sizeof(obj));
	obj.handle = handle;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
847
	execbuf.flags = I915_EXEC_FENCE_OUT;
848 849 850 851 852 853 854 855

	n = 0;
	count = 0;
	signal = 0;

	intel_detect_and_clear_missed_interrupts(fd);
	clock_gettime(CLOCK_MONOTONIC, &start);
	do {
856 857 858 859 860
		for (int e = 0; e < nengine; e++) {
			if (fences[n] != -1) {
				igt_assert(fence_wait(fences[n]));
				close(fences[n]);
			}
861

862 863 864
			execbuf.flags &= ~ENGINE_FLAGS;
			execbuf.flags |= engines[e];
			gem_execbuf_wr(fd, &execbuf);
865

866 867 868
			/* Enable signaling by doing a poll() */
			fences[n] = execbuf.rsvd2 >> 32;
			signal += fence_enable_signaling(fences[n]);
869

870
			n = (n + 1) % NFENCES;
871 872
		}

873
		count += nengine;
874 875 876 877 878 879 880 881 882 883 884 885
		clock_gettime(CLOCK_MONOTONIC, &now);
	} while (elapsed(&start, &now) < timeout);
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);

	for (n = 0; n < NFENCES; n++)
		if (fences[n] != -1)
			close(fences[n]);
	free(fences);

	igt_info("Signal %s: %'lu cycles (%'lu signals): %.3fus\n",
		 ring_name, count, signal, elapsed(&start, &now) * 1e6 / count);
}
886 887

static void preempt(int fd, uint32_t handle,
888
		    const struct intel_execution_engine2 *e)
889 890 891 892 893 894
{
	struct drm_i915_gem_execbuffer2 execbuf;
	struct drm_i915_gem_exec_object2 obj;
	struct timespec start, now;
	unsigned long count;
	uint32_t ctx[2];
895
	igt_spin_t *spin;
896

897
	ctx[0] = gem_context_clone_with_engines(fd, 0);
898
	gem_context_set_priority(fd, ctx[0], MIN_PRIO);
899

900
	ctx[1] = gem_context_clone_with_engines(fd, 0);
901
	gem_context_set_priority(fd, ctx[1], MAX_PRIO);
902 903 904 905 906 907 908

	memset(&obj, 0, sizeof(obj));
	obj.handle = handle;

	memset(&execbuf, 0, sizeof(execbuf));
	execbuf.buffers_ptr = to_user_pointer(&obj);
	execbuf.buffer_count = 1;
909
	execbuf.flags = e->flags;
910 911
	execbuf.flags |= I915_EXEC_HANDLE_LUT;
	execbuf.flags |= I915_EXEC_NO_RELOC;
912
	if (__gem_execbuf(fd, &execbuf)) {
913
		execbuf.flags = e->flags;
914 915 916 917 918 919
		gem_execbuf(fd, &execbuf);
	}
	execbuf.rsvd1 = ctx[1];
	intel_detect_and_clear_missed_interrupts(fd);

	count = 0;
920
	spin = __igt_spin_new(fd, .ctx = ctx[0], .engine = e->flags);
921 922
	clock_gettime(CLOCK_MONOTONIC, &start);
	do {
923 924
		gem_execbuf(fd, &execbuf);
		count++;
925 926
		clock_gettime(CLOCK_MONOTONIC, &now);
	} while (elapsed(&start, &now) < 20);
927
	igt_spin_free(fd, spin);
928 929 930 931 932 933
	igt_assert_eq(intel_detect_and_clear_missed_interrupts(fd), 0);

	gem_context_destroy(fd, ctx[1]);
	gem_context_destroy(fd, ctx[0]);

	igt_info("%s: %'lu cycles: %.3fus\n",
934
		 e->name, count, elapsed(&start, &now)*1e6 / count);
935
}
936

Daniel Vetter's avatar
Daniel Vetter committed
937
igt_main
938
{
939
	const struct intel_execution_engine2 *e;
940
	uint32_t handle = 0;
941
	int device = -1;
942

Daniel Vetter's avatar
Daniel Vetter committed
943
	igt_fixture {
944 945
		const uint32_t bbe = MI_BATCH_BUFFER_END;

946
		device = drm_open_driver(DRIVER_INTEL);
947
		igt_require_gem(device);
948
		gem_submission_print_method(device);
949
		gem_scheduler_print_capability(device);
950

951
		handle = gem_create(device, 4096);
952
		gem_write(device, handle, 0, &bbe, sizeof(bbe));
953

954 955
		igt_fork_hang_detector(device);
	}
956

957
	igt_subtest("basic-series")
958
		series(device, handle, 2);
959 960

	igt_subtest("basic-parallel")
961
		parallel(device, handle, 2);
962

963
	igt_subtest("basic-sequential")
964
		sequential(device, handle, 0, 2);
965

966 967 968 969 970 971 972 973 974 975 976 977 978
	igt_subtest_with_dynamic("single") {
		__for_each_physical_engine(device, e) {
			igt_dynamic_f("%s", e->name)
				single(device, handle, e);
		}
	}

	igt_subtest_with_dynamic("signal") {
		__for_each_physical_engine(device, e) {
			igt_dynamic_f("%s", e->name)
				fence_signal(device, handle, e,
					     e->name, 2);
		}
979 980 981
	}

	igt_subtest("signal-all")
982 983
		/* NULL value means all engines */
		fence_signal(device, handle, NULL, "all", 20);
984

985
	igt_subtest("series")
986
		series(device, handle, 20);
987 988

	igt_subtest("parallel")
989
		parallel(device, handle, 20);
990

991 992 993 994 995 996 997 998 999 1000
	igt_subtest("independent")
		independent(device, handle, 20);

	igt_subtest_with_dynamic("multiple") {
		__for_each_physical_engine(device, e) {
			igt_dynamic_f("%s", e->name)
				multiple(device, e, 20);
		}
	}

1001
	igt_subtest("sequential")
1002
		sequential(device, handle, 0, 20);
1003 1004

	igt_subtest("forked-sequential")
1005
		sequential(device, handle, FORKED, 20);
1006 1007

	igt_subtest("context-sequential")
1008
		sequential(device, handle, FORKED | CONTEXT, 20);
1009

1010 1011
	igt_subtest_group {
		igt_fixture {
1012
			gem_require_contexts(device);
1013 1014
			igt_require(gem_scheduler_has_ctx_priority(device));
			igt_require(gem_scheduler_has_preemption(device));
1015
		}
1016 1017 1018 1019 1020
		igt_subtest_with_dynamic("preempt") {
			__for_each_physical_engine(device, e) {
				igt_dynamic_f("%s", e->name)
					preempt(device, handle, e);
			}
1021 1022 1023
		}
	}

1024 1025 1026 1027 1028
	igt_subtest_group {
		igt_fixture {
			igt_device_set_master(device);
		}

1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
		igt_subtest_with_dynamic("poll") {
			__for_each_physical_engine(device, e) {
				/* Requires master for STORE_DWORD on gen4/5 */
				igt_dynamic_f("%s", e->name)
					poll_ring(device, e, 20);
			}
		}

		igt_subtest_with_dynamic("headless") {
			__for_each_physical_engine(device, e) {
				igt_dynamic_f("%s", e->name)
				/* Requires master for changing display modes */
					headless(device, handle, e);
			}
1043 1044 1045
		}

		igt_subtest("poll-sequential")
1046
			poll_sequential(device, "Sequential", 20);
1047

1048
	}
1049

Daniel Vetter's avatar
Daniel Vetter committed
1050
	igt_fixture {
1051
		igt_stop_hang_detector();
1052 1053
		gem_close(device, handle);
		close(device);
Daniel Vetter's avatar
Daniel Vetter committed
1054
	}
1055
}