intel_batchbuffer.c 25 KB
Newer Older
1
/**************************************************************************
2
 *
3 4
 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
 * All Rights Reserved.
5
 *
6 7 8 9 10 11 12
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
13
 *
14 15 16
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
17
 *
18 19 20 21 22 23 24
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26 27 28 29 30 31 32
 **************************************************************************/

#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
33

34
#include "drm.h"
35
#include "drmtest.h"
36 37
#include "intel_batchbuffer.h"
#include "intel_bufmgr.h"
38 39
#include "intel_chipset.h"
#include "intel_reg.h"
40
#include "rendercopy.h"
41
#include "media_fill.h"
42
#include "ioctl_wrappers.h"
Jeff McGee's avatar
Jeff McGee committed
43
#include "media_spin.h"
44
#include "gpgpu_fill.h"
45

46
#include <i915_drm.h>
47

48 49 50
/**
 * SECTION:intel_batchbuffer
 * @short_description: Batchbuffer and blitter support
51
 * @title: Batch Buffer
52
 * @include: igt.h
53
 *
54 55 56 57 58 59 60 61 62
 * This library provides some basic support for batchbuffers and using the
 * blitter engine based upon libdrm. A new batchbuffer is allocated with
 * intel_batchbuffer_alloc() and for simple blitter commands submitted with
 * intel_batchbuffer_flush().
 *
 * It also provides some convenient macros to easily emit commands into
 * batchbuffers. All those macros presume that a pointer to a #intel_batchbuffer
 * structure called batch is in scope. The basic macros are #BEGIN_BATCH,
 * #OUT_BATCH, #OUT_RELOC and #ADVANCE_BATCH.
63
 *
64
 * Note that this library's header pulls in the [i-g-t core](igt-gpu-tools-i-g-t-core.html)
65
 * library as a dependency.
66 67
 */

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
/**
 * intel_batchbuffer_align:
 * @batch: batchbuffer object
 * @align: value in bytes to which we want to align
 *
 * Aligns the current in-batch offset to the given value.
 *
 * Returns: Batchbuffer offset aligned to the given value.
 */
uint32_t
intel_batchbuffer_align(struct intel_batchbuffer *batch, uint32_t align)
{
	uint32_t offset = batch->ptr - batch->buffer;

	offset = ALIGN(offset, align);
	batch->ptr = batch->buffer + offset;
	return offset;
}

/**
 * intel_batchbuffer_subdata_alloc:
 * @batch: batchbuffer object
 * @size: amount of bytes need to allocate
 * @align: value in bytes to which we want to align
 *
 * Verify if sufficient @size within @batch is available to deny overflow.
 * Then allocate @size bytes within @batch.
 *
 * Returns: Offset within @batch between allocated subdata and base of @batch.
 */
void *
intel_batchbuffer_subdata_alloc(struct intel_batchbuffer *batch, uint32_t size,
				uint32_t align)
{
	uint32_t offset = intel_batchbuffer_align(batch, align);

	igt_assert(size <= intel_batchbuffer_space(batch));

	batch->ptr += size;
	return memset(batch->buffer + offset, 0, size);
}

/**
 * intel_batchbuffer_subdata_offset:
 * @batch: batchbuffer object
 * @ptr: pointer to given data
 *
 * Returns: Offset within @batch between @ptr and base of @batch.
 */
uint32_t
intel_batchbuffer_subdata_offset(struct intel_batchbuffer *batch, void *ptr)
{
	return (uint8_t *)ptr - batch->buffer;
}

123 124 125 126 127 128
/**
 * intel_batchbuffer_reset:
 * @batch: batchbuffer object
 *
 * Resets @batch by allocating a new gem buffer object as backing storage.
 */
129 130 131 132 133 134 135 136 137 138 139
void
intel_batchbuffer_reset(struct intel_batchbuffer *batch)
{
	if (batch->bo != NULL) {
		drm_intel_bo_unreference(batch->bo);
		batch->bo = NULL;
	}

	batch->bo = drm_intel_bo_alloc(batch->bufmgr, "batchbuffer",
				       BATCH_SZ, 4096);

140
	memset(batch->buffer, 0, sizeof(batch->buffer));
141
	batch->ctx = NULL;
142

143
	batch->ptr = batch->buffer;
144
	batch->end = NULL;
145 146
}

147
/**
148
 * intel_batchbuffer_alloc:
149 150 151 152 153 154 155 156
 * @bufmgr: libdrm buffer manager
 * @devid: pci device id of the drm device
 *
 * Allocates a new batchbuffer object. @devid must be supplied since libdrm
 * doesn't expose it directly.
 *
 * Returns: The allocated and initialized batchbuffer object.
 */
157
struct intel_batchbuffer *
158
intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr, uint32_t devid)
159 160 161 162
{
	struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);

	batch->bufmgr = bufmgr;
163
	batch->devid = devid;
164
	batch->gen = intel_gen(devid);
165 166 167 168 169
	intel_batchbuffer_reset(batch);

	return batch;
}

170
/**
171
 * intel_batchbuffer_free:
172 173 174 175
 * @batch: batchbuffer object
 *
 * Releases all resource of the batchbuffer object @batch.
 */
176 177 178 179 180 181 182 183
void
intel_batchbuffer_free(struct intel_batchbuffer *batch)
{
	drm_intel_bo_unreference(batch->bo);
	batch->bo = NULL;
	free(batch);
}

184 185
#define CMD_POLY_STIPPLE_OFFSET       0x7906

Ben Widawsky's avatar
Ben Widawsky committed
186 187
static unsigned int
flush_on_ring_common(struct intel_batchbuffer *batch, int ring)
188
{
189
	unsigned int used = batch->ptr - batch->buffer;
190 191

	if (used == 0)
Ben Widawsky's avatar
Ben Widawsky committed
192
		return 0;
193

194
	if (IS_GEN5(batch->devid)) {
195 196 197
		/* emit gen5 w/a without batch space checks - we reserve that
		 * already. */
		*(uint32_t *) (batch->ptr) = CMD_POLY_STIPPLE_OFFSET << 16;
198
		batch->ptr += 4;
199
		*(uint32_t *) (batch->ptr) = 0;
200
		batch->ptr += 4;
201 202
	}

203 204 205 206 207 208 209
	/* Round batchbuffer usage to 2 DWORDs. */
	if ((used & 4) == 0) {
		*(uint32_t *) (batch->ptr) = 0; /* noop */
		batch->ptr += 4;
	}

	/* Mark the end of the buffer. */
210
	*(uint32_t *)(batch->ptr) = MI_BATCH_BUFFER_END; /* noop */
211
	batch->ptr += 4;
Ben Widawsky's avatar
Ben Widawsky committed
212 213 214
	return batch->ptr - batch->buffer;
}

215 216 217 218 219 220 221
/**
 * intel_batchbuffer_flush_on_ring:
 * @batch: batchbuffer object
 * @ring: execbuf ring flag
 *
 * Submits the batch for execution on @ring.
 */
Ben Widawsky's avatar
Ben Widawsky committed
222 223 224 225
void
intel_batchbuffer_flush_on_ring(struct intel_batchbuffer *batch, int ring)
{
	unsigned int used = flush_on_ring_common(batch, ring);
226
	drm_intel_context *ctx;
Ben Widawsky's avatar
Ben Widawsky committed
227 228 229

	if (used == 0)
		return;
230

231
	do_or_die(drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer));
232 233 234

	batch->ptr = NULL;

235 236 237 238 239
	/* XXX bad kernel API */
	ctx = batch->ctx;
	if (ring != I915_EXEC_RENDER)
		ctx = NULL;
	do_or_die(drm_intel_gem_bo_context_exec(batch->bo, ctx, used, ring));
240 241 242 243

	intel_batchbuffer_reset(batch);
}

244 245 246 247 248 249 250
void
intel_batchbuffer_set_context(struct intel_batchbuffer *batch,
				     drm_intel_context *context)
{
	batch->ctx = context;
}

251 252 253 254 255 256 257 258
/**
 * intel_batchbuffer_flush_with_context:
 * @batch: batchbuffer object
 * @context: libdrm hardware context object
 *
 * Submits the batch for execution on the render engine with the supplied
 * hardware context.
 */
Ben Widawsky's avatar
Ben Widawsky committed
259 260 261 262 263 264 265 266 267 268 269
void
intel_batchbuffer_flush_with_context(struct intel_batchbuffer *batch,
				     drm_intel_context *context)
{
	int ret;
	unsigned int used = flush_on_ring_common(batch, I915_EXEC_RENDER);

	if (used == 0)
		return;

	ret = drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer);
Daniel Vetter's avatar
Daniel Vetter committed
270
	igt_assert(ret == 0);
Ben Widawsky's avatar
Ben Widawsky committed
271 272 273 274 275

	batch->ptr = NULL;

	ret = drm_intel_gem_bo_context_exec(batch->bo, context, used,
					    I915_EXEC_RENDER);
Daniel Vetter's avatar
Daniel Vetter committed
276
	igt_assert(ret == 0);
Ben Widawsky's avatar
Ben Widawsky committed
277 278 279 280

	intel_batchbuffer_reset(batch);
}

281 282 283 284 285 286 287
/**
 * intel_batchbuffer_flush:
 * @batch: batchbuffer object
 *
 * Submits the batch for execution on the blitter engine, selecting the right
 * ring depending upon the hardware platform.
 */
288 289 290 291 292 293 294 295 296
void
intel_batchbuffer_flush(struct intel_batchbuffer *batch)
{
	int ring = 0;
	if (HAS_BLT_RING(batch->devid))
		ring = I915_EXEC_BLT;
	intel_batchbuffer_flush_on_ring(batch, ring);
}

297

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
/**
 * intel_batchbuffer_emit_reloc:
 * @batch: batchbuffer object
 * @buffer: relocation target libdrm buffer object
 * @delta: delta value to add to @buffer's gpu address
 * @read_domains: gem domain bits for the relocation
 * @write_domain: gem domain bit for the relocation
 * @fenced: whether this gpu access requires fences
 *
 * Emits both a libdrm relocation entry pointing at @buffer and the pre-computed
 * DWORD of @batch's presumed gpu address plus the supplied @delta into @batch.
 *
 * Note that @fenced is only relevant if @buffer is actually tiled.
 *
 * This is the only way buffers get added to the validate list.
313 314 315
 */
void
intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch,
316
                             drm_intel_bo *buffer, uint64_t delta,
Daniel Vetter's avatar
Daniel Vetter committed
317 318
			     uint32_t read_domains, uint32_t write_domain,
			     int fenced)
319
{
320
	uint64_t offset;
321 322
	int ret;

323
	if (batch->ptr - batch->buffer > BATCH_SZ)
Daniel Vetter's avatar
Daniel Vetter committed
324 325 326
		igt_info("bad relocation ptr %p map %p offset %d size %d\n",
			 batch->ptr, batch->buffer,
			 (int)(batch->ptr - batch->buffer), BATCH_SZ);
327

Daniel Vetter's avatar
Daniel Vetter committed
328 329 330 331 332 333 334 335
	if (fenced)
		ret = drm_intel_bo_emit_reloc_fence(batch->bo, batch->ptr - batch->buffer,
						    buffer, delta,
						    read_domains, write_domain);
	else
		ret = drm_intel_bo_emit_reloc(batch->bo, batch->ptr - batch->buffer,
					      buffer, delta,
					      read_domains, write_domain);
336 337 338 339 340 341

	offset = buffer->offset64;
	offset += delta;
	intel_batchbuffer_emit_dword(batch, offset);
	if (batch->gen >= 8)
		intel_batchbuffer_emit_dword(batch, offset >> 32);
Daniel Vetter's avatar
Daniel Vetter committed
342
	igt_assert(ret == 0);
343 344
}

345
/**
346
 * intel_batchbuffer_copy_data:
347 348 349
 * @batch: batchbuffer object
 * @data: pointer to the data to write into the batchbuffer
 * @bytes: number of bytes to write into the batchbuffer
350
 * @align: value in bytes to which we want to align
351 352
 *
 * This transfers the given @data into the batchbuffer. Note that the length
353 354 355 356 357
 * must be DWORD aligned, i.e. multiples of 32bits. The caller must
 * confirm that there is enough space in the batch for the data to be
 * copied.
 *
 * Returns: Offset of copied data.
358
 */
359 360 361 362
uint32_t
intel_batchbuffer_copy_data(struct intel_batchbuffer *batch,
			    const void *data, unsigned int bytes,
			    uint32_t align)
363
{
364 365
	uint32_t *subdata;

Daniel Vetter's avatar
Daniel Vetter committed
366
	igt_assert((bytes & 3) == 0);
367 368 369 370
	subdata = intel_batchbuffer_subdata_alloc(batch, bytes, align);
	memcpy(subdata, data, bytes);

	return intel_batchbuffer_subdata_offset(batch, subdata);
371
}
372

373 374 375 376 377 378 379 380
/**
 * intel_blt_copy:
 * @batch: batchbuffer object
 * @src_bo: source libdrm buffer object
 * @src_x1: source pixel x-coordination
 * @src_y1: source pixel y-coordination
 * @src_pitch: @src_bo's pitch in bytes
 * @dst_bo: destination libdrm buffer object
381 382
 * @dst_x1: destination pixel x-coordination
 * @dst_y1: destination pixel y-coordination
383 384 385 386 387 388 389 390
 * @dst_pitch: @dst_bo's pitch in bytes
 * @width: width of the copied rectangle
 * @height: height of the copied rectangle
 * @bpp: bits per pixel
 *
 * This emits a 2D copy operation using blitter commands into the supplied batch
 * buffer object.
 */
391
void
392
intel_blt_copy(struct intel_batchbuffer *batch,
393 394 395
	       drm_intel_bo *src_bo, int src_x1, int src_y1, int src_pitch,
	       drm_intel_bo *dst_bo, int dst_x1, int dst_y1, int dst_pitch,
	       int width, int height, int bpp)
396
{
397
	const int gen = batch->gen;
398 399
	uint32_t src_tiling, dst_tiling, swizzle;
	uint32_t cmd_bits = 0;
400
	uint32_t br13_bits;
401

402 403 404 405 406
	igt_assert(bpp*(src_x1 + width) <= 8*src_pitch);
	igt_assert(bpp*(dst_x1 + width) <= 8*dst_pitch);
	igt_assert(src_pitch * (src_y1 + height) <= src_bo->size);
	igt_assert(dst_pitch * (dst_y1 + height) <= dst_bo->size);

407 408 409
	drm_intel_bo_get_tiling(src_bo, &src_tiling, &swizzle);
	drm_intel_bo_get_tiling(dst_bo, &dst_tiling, &swizzle);

410
	if (gen >= 4 && src_tiling != I915_TILING_NONE) {
411 412 413 414
		src_pitch /= 4;
		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
	}

415
	if (gen >= 4 && dst_tiling != I915_TILING_NONE) {
416 417 418 419
		dst_pitch /= 4;
		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
	}

420 421 422 423 424 425 426 427 428 429 430 431
#define CHECK_RANGE(x)	((x) >= 0 && (x) < (1 << 15))
	igt_assert(CHECK_RANGE(src_x1) && CHECK_RANGE(src_y1) &&
		   CHECK_RANGE(dst_x1) && CHECK_RANGE(dst_y1) &&
		   CHECK_RANGE(width) && CHECK_RANGE(height) &&
		   CHECK_RANGE(src_x1 + width) &&
		   CHECK_RANGE(src_y1 + height) &&
		   CHECK_RANGE(dst_x1 + width) &&
		   CHECK_RANGE(dst_y1 + height) &&
		   CHECK_RANGE(src_pitch) &&
		   CHECK_RANGE(dst_pitch));
#undef CHECK_RANGE

432 433 434 435 436 437 438 439 440 441 442 443 444
	br13_bits = 0;
	switch (bpp) {
	case 8:
		break;
	case 16:		/* supporting only RGB565, not ARGB1555 */
		br13_bits |= 1 << 24;
		break;
	case 32:
		br13_bits |= 3 << 24;
		cmd_bits |= XY_SRC_COPY_BLT_WRITE_ALPHA |
			    XY_SRC_COPY_BLT_WRITE_RGB;
		break;
	default:
445
		igt_fail(IGT_EXIT_FAILURE);
446 447
	}

448
	BLIT_COPY_BATCH_START(cmd_bits);
449
	OUT_BATCH((br13_bits) |
450 451
		  (0xcc << 16) | /* copy ROP */
		  dst_pitch);
452 453
	OUT_BATCH((dst_y1 << 16) | dst_x1); /* dst x1,y1 */
	OUT_BATCH(((dst_y1 + height) << 16) | (dst_x1 + width)); /* dst x2,y2 */
454
	OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
455
	OUT_BATCH((src_y1 << 16) | src_x1); /* src x1,y1 */
456
	OUT_BATCH(src_pitch);
457
	OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
458 459
	ADVANCE_BATCH();

460 461
#define CMD_POLY_STIPPLE_OFFSET       0x7906
	if (gen == 5) {
462
		BEGIN_BATCH(2, 0);
463 464
		OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
		OUT_BATCH(0);
465
		ADVANCE_BATCH();
466 467 468
	}

	if (gen >= 6 && src_bo == dst_bo) {
469
		BEGIN_BATCH(3, 0);
470 471 472 473 474 475
		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
		OUT_BATCH(0);
		OUT_BATCH(0);
		ADVANCE_BATCH();
	}

476 477
	intel_batchbuffer_flush(batch);
}
478

479 480 481 482 483
/**
 * intel_copy_bo:
 * @batch: batchbuffer object
 * @src_bo: source libdrm buffer object
 * @dst_bo: destination libdrm buffer object
484
 * @size: size of the copy range in bytes
485 486
 *
 * This emits a copy operation using blitter commands into the supplied batch
487 488
 * buffer object. A total of @size bytes from the start of @src_bo is copied
 * over to @dst_bo. Note that @size must be page-aligned.
489
 */
490 491 492
void
intel_copy_bo(struct intel_batchbuffer *batch,
	      drm_intel_bo *dst_bo, drm_intel_bo *src_bo,
493
	      long int size)
494
{
Daniel Vetter's avatar
Daniel Vetter committed
495
	igt_assert(size % 4096 == 0);
496

497
	intel_blt_copy(batch,
498 499 500
		       src_bo, 0, 0, 4096,
		       dst_bo, 0, 0, 4096,
		       4096/4, size/4096, 32);
501
}
502

503 504 505 506
/**
 * igt_buf_width:
 * @buf: the i-g-t buffer object
 *
507
 * Computes the width in 32-bit pixels of the given buffer.
508 509 510 511
 *
 * Returns:
 * The width of the buffer.
 */
Ville Syrjälä's avatar
Ville Syrjälä committed
512
unsigned igt_buf_width(const struct igt_buf *buf)
513
{
514
	return buf->stride/(buf->bpp / 8);
515 516
}

517 518 519 520 521 522 523 524 525
/**
 * igt_buf_height:
 * @buf: the i-g-t buffer object
 *
 * Computes the height in 32-bit pixels of the given buffer.
 *
 * Returns:
 * The height of the buffer.
 */
Ville Syrjälä's avatar
Ville Syrjälä committed
526
unsigned igt_buf_height(const struct igt_buf *buf)
527 528 529 530
{
	return buf->size/buf->stride;
}

531 532 533 534
/*
 * pitches are in bytes if the surfaces are linear, number of dwords
 * otherwise
 */
535
static uint32_t fast_copy_pitch(unsigned int stride, unsigned int tiling)
536
{
537 538
	if (tiling != I915_TILING_NONE)
		return stride / 4;
539
	else
540
		return stride;
541 542
}

543 544
static uint32_t fast_copy_dword0(unsigned int src_tiling,
				 unsigned int dst_tiling)
545
{
546
	uint32_t dword0 = 0;
547 548 549

	dword0 |= XY_FAST_COPY_BLT;

550
	switch (src_tiling) {
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
	case I915_TILING_X:
		dword0 |= XY_FAST_COPY_SRC_TILING_X;
		break;
	case I915_TILING_Y:
	case I915_TILING_Yf:
		dword0 |= XY_FAST_COPY_SRC_TILING_Yb_Yf;
		break;
	case I915_TILING_Ys:
		dword0 |= XY_FAST_COPY_SRC_TILING_Ys;
		break;
	case I915_TILING_NONE:
	default:
		break;
	}

566
	switch (dst_tiling) {
567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
	case I915_TILING_X:
		dword0 |= XY_FAST_COPY_DST_TILING_X;
		break;
	case I915_TILING_Y:
	case I915_TILING_Yf:
		dword0 |= XY_FAST_COPY_DST_TILING_Yb_Yf;
		break;
	case I915_TILING_Ys:
		dword0 |= XY_FAST_COPY_DST_TILING_Ys;
		break;
	case I915_TILING_NONE:
	default:
		break;
	}

582 583 584 585
	return dword0;
}

static uint32_t fast_copy_dword1(unsigned int src_tiling,
586 587
				 unsigned int dst_tiling,
				 int bpp)
588 589 590 591
{
	uint32_t dword1 = 0;

	if (src_tiling == I915_TILING_Yf)
592
		dword1 |= XY_FAST_COPY_SRC_TILING_Yf;
593
	if (dst_tiling == I915_TILING_Yf)
594 595
		dword1 |= XY_FAST_COPY_DST_TILING_Yf;

596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
	switch (bpp) {
	case 8:
		dword1 |= XY_FAST_COPY_COLOR_DEPTH_8;
		break;
	case 16:
		dword1 |= XY_FAST_COPY_COLOR_DEPTH_16;
		break;
	case 32:
		dword1 |= XY_FAST_COPY_COLOR_DEPTH_32;
		break;
	case 64:
		dword1 |= XY_FAST_COPY_COLOR_DEPTH_64;
		break;
	case 128:
		dword1 |= XY_FAST_COPY_COLOR_DEPTH_128;
		break;
	default:
		igt_assert(0);
	}
615

616 617 618
	return dword1;
}

619 620
static void
fill_relocation(struct drm_i915_gem_relocation_entry *reloc,
621 622
		uint32_t gem_handle, uint32_t delta, /* in bytes */
		uint32_t offset, /* in dwords */
623 624 625
		uint32_t read_domains, uint32_t write_domains)
{
	reloc->target_handle = gem_handle;
626
	reloc->delta = delta;
627 628 629 630 631 632 633 634 635 636 637 638 639
	reloc->offset = offset * sizeof(uint32_t);
	reloc->presumed_offset = 0;
	reloc->read_domains = read_domains;
	reloc->write_domain = write_domains;
}

static void
fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle,
	    struct drm_i915_gem_relocation_entry *relocs, uint32_t count)
{
	memset(obj, 0, sizeof(*obj));
	obj->handle = gem_handle;
	obj->relocation_count = count;
640
	obj->relocs_ptr = to_user_pointer(relocs);
641 642 643 644 645 646 647 648
}

static void exec_blit(int fd,
		      struct drm_i915_gem_exec_object2 *objs, uint32_t count,
		      uint32_t batch_len /* in dwords */)
{
	struct drm_i915_gem_execbuffer2 exec;

649
	exec.buffers_ptr = to_user_pointer(objs);
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
	exec.buffer_count = count;
	exec.batch_start_offset = 0;
	exec.batch_len = batch_len * 4;
	exec.DR1 = exec.DR4 = 0;
	exec.num_cliprects = 0;
	exec.cliprects_ptr = 0;
	exec.flags = I915_EXEC_BLT;
	i915_execbuffer2_set_context_id(exec, 0);
	exec.rsvd2 = 0;

	gem_execbuf(fd, &exec);
}

/**
 * igt_blitter_fast_copy__raw:
 * @fd: file descriptor of the i915 driver
 * @src_handle: GEM handle of the source buffer
667
 * @src_delta: offset into the source GEM bo, in bytes
668 669 670 671 672 673
 * @src_stride: Stride (in bytes) of the source buffer
 * @src_tiling: Tiling mode of the source buffer
 * @src_x: X coordinate of the source region to copy
 * @src_y: Y coordinate of the source region to copy
 * @width: Width of the region to copy
 * @height: Height of the region to copy
674
 * @bpp: source and destination bits per pixel
675 676
 * @dst_handle: GEM handle of the destination buffer
 * @dst_delta: offset into the destination GEM bo, in bytes
677 678 679 680 681 682 683 684 685 686
 * @dst_stride: Stride (in bytes) of the destination buffer
 * @dst_tiling: Tiling mode of the destination buffer
 * @dst_x: X coordinate of destination
 * @dst_y: Y coordinate of destination
 *
 * Like igt_blitter_fast_copy(), but talking to the kernel directly.
 */
void igt_blitter_fast_copy__raw(int fd,
				/* src */
				uint32_t src_handle,
687
				unsigned int src_delta,
688 689 690 691 692 693 694
				unsigned int src_stride,
				unsigned int src_tiling,
				unsigned int src_x, unsigned src_y,

				/* size */
				unsigned int width, unsigned int height,

695 696 697
				/* bpp */
				int bpp,

698 699
				/* dst */
				uint32_t dst_handle,
700
				unsigned dst_delta,
701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
				unsigned int dst_stride,
				unsigned int dst_tiling,
				unsigned int dst_x, unsigned dst_y)
{
	uint32_t batch[12];
	struct drm_i915_gem_exec_object2 objs[3];
	struct drm_i915_gem_relocation_entry relocs[2];
	uint32_t batch_handle;
	uint32_t dword0, dword1;
	uint32_t src_pitch, dst_pitch;
	int i = 0;

	src_pitch = fast_copy_pitch(src_stride, src_tiling);
	dst_pitch = fast_copy_pitch(dst_stride, dst_tiling);
	dword0 = fast_copy_dword0(src_tiling, dst_tiling);
716
	dword1 = fast_copy_dword1(src_tiling, dst_tiling, bpp);
717 718 719 720 721 722 723 724 725 726 727 728 729 730

#define CHECK_RANGE(x)	((x) >= 0 && (x) < (1 << 15))
	assert(CHECK_RANGE(src_x) && CHECK_RANGE(src_y) &&
	       CHECK_RANGE(dst_x) && CHECK_RANGE(dst_y) &&
	       CHECK_RANGE(width) && CHECK_RANGE(height) &&
	       CHECK_RANGE(src_x + width) && CHECK_RANGE(src_y + height) &&
	       CHECK_RANGE(dst_x + width) && CHECK_RANGE(dst_y + height) &&
	       CHECK_RANGE(src_pitch) && CHECK_RANGE(dst_pitch));
#undef CHECK_RANGE

	batch[i++] = dword0;
	batch[i++] = dword1 | dst_pitch;
	batch[i++] = (dst_y << 16) | dst_x; /* dst x1,y1 */
	batch[i++] = ((dst_y + height) << 16) | (dst_x + width); /* dst x2,y2 */
731
	batch[i++] = dst_delta; /* dst address lower bits */
732 733 734
	batch[i++] = 0;	/* dst address upper bits */
	batch[i++] = (src_y << 16) | src_x; /* src x1,y1 */
	batch[i++] = src_pitch;
735
	batch[i++] = src_delta; /* src address lower bits */
736 737 738 739 740 741 742 743 744
	batch[i++] = 0;	/* src address upper bits */
	batch[i++] = MI_BATCH_BUFFER_END;
	batch[i++] = MI_NOOP;

	igt_assert(i == ARRAY_SIZE(batch));

	batch_handle = gem_create(fd, 4096);
	gem_write(fd, batch_handle, 0, batch, sizeof(batch));

745
	fill_relocation(&relocs[0], dst_handle, dst_delta, 4,
746
			I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
747
	fill_relocation(&relocs[1], src_handle, src_delta, 8, I915_GEM_DOMAIN_RENDER, 0);
748 749 750 751 752 753 754 755 756 757

	fill_object(&objs[0], dst_handle, NULL, 0);
	fill_object(&objs[1], src_handle, NULL, 0);
	fill_object(&objs[2], batch_handle, relocs, 2);

	exec_blit(fd, objs, 3, ARRAY_SIZE(batch));

	gem_close(fd, batch_handle);
}

758 759 760 761
/**
 * igt_blitter_fast_copy:
 * @batch: batchbuffer object
 * @src: source i-g-t buffer object
762
 * @src_delta: offset into the source i-g-t bo
763 764 765 766 767
 * @src_x: source pixel x-coordination
 * @src_y: source pixel y-coordination
 * @width: width of the copied rectangle
 * @height: height of the copied rectangle
 * @dst: destination i-g-t buffer object
768
 * @dst_delta: offset into the destination i-g-t bo
769 770 771
 * @dst_x: destination pixel x-coordination
 * @dst_y: destination pixel y-coordination
 *
772
 * Copy @src into @dst using the gen9 fast copy blitter command.
773 774 775 776
 *
 * The source and destination surfaces cannot overlap.
 */
void igt_blitter_fast_copy(struct intel_batchbuffer *batch,
Ville Syrjälä's avatar
Ville Syrjälä committed
777
			   const struct igt_buf *src, unsigned src_delta,
778
			   unsigned src_x, unsigned src_y,
779
			   unsigned width, unsigned height,
780
			   int bpp,
Ville Syrjälä's avatar
Ville Syrjälä committed
781
			   const struct igt_buf *dst, unsigned dst_delta,
782
			   unsigned dst_x, unsigned dst_y)
783 784 785 786
{
	uint32_t src_pitch, dst_pitch;
	uint32_t dword0, dword1;

787 788
	igt_assert(src->bpp == dst->bpp);

789 790 791
	src_pitch = fast_copy_pitch(src->stride, src->tiling);
	dst_pitch = fast_copy_pitch(dst->stride, src->tiling);
	dword0 = fast_copy_dword0(src->tiling, dst->tiling);
792
	dword1 = fast_copy_dword1(src->tiling, dst->tiling, dst->bpp);
793 794 795 796 797 798 799 800 801 802

#define CHECK_RANGE(x)	((x) >= 0 && (x) < (1 << 15))
	assert(CHECK_RANGE(src_x) && CHECK_RANGE(src_y) &&
	       CHECK_RANGE(dst_x) && CHECK_RANGE(dst_y) &&
	       CHECK_RANGE(width) && CHECK_RANGE(height) &&
	       CHECK_RANGE(src_x + width) && CHECK_RANGE(src_y + height) &&
	       CHECK_RANGE(dst_x + width) && CHECK_RANGE(dst_y + height) &&
	       CHECK_RANGE(src_pitch) && CHECK_RANGE(dst_pitch));
#undef CHECK_RANGE

803 804 805 806 807
	BEGIN_BATCH(10, 2);
	OUT_BATCH(dword0);
	OUT_BATCH(dword1 | dst_pitch);
	OUT_BATCH((dst_y << 16) | dst_x); /* dst x1,y1 */
	OUT_BATCH(((dst_y + height) << 16) | (dst_x + width)); /* dst x2,y2 */
808
	OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, dst_delta);
809 810 811
	OUT_BATCH(0);	/* dst address upper bits */
	OUT_BATCH((src_y << 16) | src_x); /* src x1,y1 */
	OUT_BATCH(src_pitch);
812
	OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, src_delta);
813 814 815 816 817 818
	OUT_BATCH(0);	/* src address upper bits */
	ADVANCE_BATCH();

	intel_batchbuffer_flush(batch);
}

819 820 821 822 823 824 825 826 827 828
/**
 * igt_get_render_copyfunc:
 * @devid: pci device id
 *
 * Returns:
 *
 * The platform-specific render copy function pointer for the device
 * specified with @devid. Will return NULL when no render copy function is
 * implemented.
 */
829
igt_render_copyfunc_t igt_get_render_copyfunc(int devid)
830
{
831
	igt_render_copyfunc_t copy = NULL;
832 833 834 835 836

	if (IS_GEN2(devid))
		copy = gen2_render_copyfunc;
	else if (IS_GEN3(devid))
		copy = gen3_render_copyfunc;
837 838
	else if (IS_GEN4(devid) || IS_GEN5(devid))
		copy = gen4_render_copyfunc;
839 840 841 842 843 844
	else if (IS_GEN6(devid))
		copy = gen6_render_copyfunc;
	else if (IS_GEN7(devid))
		copy = gen7_render_copyfunc;
	else if (IS_GEN8(devid))
		copy = gen8_render_copyfunc;
845
	else if (IS_GEN9(devid) || IS_GEN10(devid))
846
		copy = gen9_render_copyfunc;
847 848
	else if (IS_GEN11(devid))
		copy = gen11_render_copyfunc;
849 850 851

	return copy;
}
852

853 854 855 856 857 858 859 860 861
/**
 * igt_get_media_fillfunc:
 * @devid: pci device id
 *
 * Returns:
 *
 * The platform-specific media fill function pointer for the device specified
 * with @devid. Will return NULL when no media fill function is implemented.
 */
862
igt_fillfunc_t igt_get_media_fillfunc(int devid)
863
{
864
	igt_fillfunc_t fill = NULL;
865

866
	if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid))
867
		fill = gen9_media_fillfunc;
868
	else if (IS_GEN8(devid))
869 870 871 872 873 874
		fill = gen8_media_fillfunc;
	else if (IS_GEN7(devid))
		fill = gen7_media_fillfunc;

	return fill;
}
Zhenyu Wang's avatar
Zhenyu Wang committed
875

876 877 878 879 880 881 882 883 884
igt_vme_func_t igt_get_media_vme_func(int devid)
{
	igt_vme_func_t fill = NULL;

	if (IS_GEN9(devid) || IS_GEN10(devid) || IS_GEN11(devid))
		fill = gen11_media_vme_func;

	return fill;
}
Zhenyu Wang's avatar
Zhenyu Wang committed
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
/**
 * igt_get_gpgpu_fillfunc:
 * @devid: pci device id
 *
 * Returns:
 *
 * The platform-specific gpgpu fill function pointer for the device specified
 * with @devid. Will return NULL when no gpgpu fill function is implemented.
 */
igt_fillfunc_t igt_get_gpgpu_fillfunc(int devid)
{
	igt_fillfunc_t fill = NULL;

	if (IS_GEN7(devid))
		fill = gen7_gpgpu_fillfunc;
900 901
	else if (IS_BROADWELL(devid))
		fill = gen8_gpgpu_fillfunc;
902
	else if (IS_GEN9(devid) || IS_GEN10(devid))
903
		fill = gen9_gpgpu_fillfunc;
904 905
	else if (IS_GEN11(devid))
		fill = gen11_gpgpu_fillfunc;
Zhenyu Wang's avatar
Zhenyu Wang committed
906 907 908

	return fill;
}
Jeff McGee's avatar
Jeff McGee committed
909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924

/**
 * igt_get_media_spinfunc:
 * @devid: pci device id
 *
 * Returns:
 *
 * The platform-specific media spin function pointer for the device specified
 * with @devid. Will return NULL when no media spin function is implemented.
 */
igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
{
	igt_media_spinfunc_t spin = NULL;

	if (IS_GEN9(devid))
		spin = gen9_media_spinfunc;
925
	else if (IS_GEN8(devid))
Jeff McGee's avatar
Jeff McGee committed
926 927 928 929
		spin = gen8_media_spinfunc;

	return spin;
}