Commit 008b3623 authored by Zbigniew Kempczyński's avatar Zbigniew Kempczyński Committed by Chris Wilson

lib/intel_batchbuffer: Extend intel_bb

As we're going toward removing libdrm from rendercopy tests some
additional code in intel_bb is required. So add new functions
and fix memory issues in the reset path.
Signed-off-by: Zbigniew Kempczyński's avatarZbigniew Kempczyński <zbigniew.kempczynski@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Chris Wilson's avatarChris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson's avatarChris Wilson <chris@chris-wilson.co.uk>
parent 9985cf23
......@@ -26,6 +26,7 @@
**************************************************************************/
#include <inttypes.h>
#include <poll.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
......@@ -42,6 +43,7 @@
#include "rendercopy.h"
#include "media_fill.h"
#include "ioctl_wrappers.h"
#include "sw_sync.h"
#include "i915/gem_mman.h"
#include "media_spin.h"
#include "gpgpu_fill.h"
......@@ -1045,8 +1047,6 @@ void igt_blitter_fast_copy(struct intel_batchbuffer *batch,
intel_batchbuffer_flush(batch);
}
#undef CHECK_RANGE
/**
* igt_get_render_copyfunc:
* @devid: pci device id
......@@ -1202,6 +1202,21 @@ static void __reallocate_objects(struct intel_bb *ibb)
}
}
static inline uint64_t __intel_bb_propose_offset(struct intel_bb *ibb)
{
uint64_t offset;
if (ibb->enforce_relocs)
return 0;
/* randomize the address, we try to avoid relocations */
offset = hars_petruska_f54_1_random64(&ibb->prng);
offset += 256 << 10; /* Keep the low 256k clear, for negative deltas */
offset &= ibb->gtt_size - 1;
return offset;
}
/**
* intel_bb_create:
* @i915: drm fd
......@@ -1211,22 +1226,26 @@ static void __reallocate_objects(struct intel_bb *ibb)
*
* Pointer the intel_bb, asserts on failure.
*/
struct intel_bb *intel_bb_create(int i915, uint32_t size)
static struct intel_bb *
__intel_bb_create(int i915, uint32_t size, bool do_relocs)
{
struct intel_bb *ibb = calloc(1, sizeof(*ibb));
uint64_t gtt_size;
uint64_t bb_address;
igt_assert(ibb);
ibb->i915 = i915;
ibb->devid = intel_get_drm_devid(i915);
ibb->gen = intel_gen(ibb->devid);
ibb->enforce_relocs = do_relocs;
ibb->handle = gem_create(i915, size);
ibb->size = size;
ibb->batch = calloc(1, size);
igt_assert(ibb->batch);
ibb->ptr = ibb->batch;
ibb->prng = (uint32_t) to_user_pointer(ibb);
ibb->fence = -1;
gtt_size = gem_aperture_size(i915);
if (!gem_uses_full_ppgtt(i915))
......@@ -1236,11 +1255,45 @@ struct intel_bb *intel_bb_create(int i915, uint32_t size)
ibb->gtt_size = gtt_size;
__reallocate_objects(ibb);
intel_bb_add_object(ibb, ibb->handle, 0, false);
bb_address = __intel_bb_propose_offset(ibb);
intel_bb_add_object(ibb, ibb->handle, bb_address, false);
ibb->refcount = 1;
return ibb;
}
/**
* intel_bb_create:
* @i915: drm fd
* @size: size of the batchbuffer
*
* Returns:
*
* Pointer the intel_bb, asserts on failure.
*/
struct intel_bb *intel_bb_create(int i915, uint32_t size)
{
return __intel_bb_create(i915, size, false);
}
/**
* intel_bb_create_with_relocs:
* @i915: drm fd
* @size: size of the batchbuffer
*
* Disable passing or randomizing addresses. This will lead to relocations
* when objects are not previously pinned.
*
* Returns:
*
* Pointer the intel_bb, asserts on failure.
*/
struct intel_bb *intel_bb_create_with_relocs(int i915, uint32_t size)
{
return __intel_bb_create(i915, size, true);
}
/*
* tdestroy() calls free function for each node, but we spread tree
* on objects array, so do nothing.
......@@ -1250,24 +1303,32 @@ static void __do_nothing(void *node)
(void) node;
}
static void __intel_bb_destroy_objects(struct intel_bb *ibb)
static void __intel_bb_destroy_relocations(struct intel_bb *ibb)
{
uint32_t i;
/* Free relocations */
for (i = 0; i < ibb->num_objects; i++)
for (i = 0; i < ibb->num_objects; i++) {
free(from_user_pointer(ibb->objects[i].relocs_ptr));
ibb->objects[i].relocs_ptr = to_user_pointer(NULL);
ibb->objects[i].relocation_count = 0;
}
free(ibb->objects);
tdestroy(ibb->root, __do_nothing);
ibb->relocs = NULL;
ibb->num_relocs = 0;
ibb->allocated_relocs = 0;
}
static void __intel_bb_destroy_objects(struct intel_bb *ibb)
{
free(ibb->objects);
ibb->objects = NULL;
tdestroy(ibb->root, __do_nothing);
ibb->root = NULL;
ibb->num_objects = 0;
ibb->num_relocs = 0;
ibb->allocated_objects = 0;
ibb->allocated_relocs = 0;
ibb->ptr = ibb->batch;
}
/**
......@@ -1280,9 +1341,16 @@ void intel_bb_destroy(struct intel_bb *ibb)
{
igt_assert(ibb);
ibb->refcount--;
igt_assert_f(ibb->refcount == 0, "Trying to destroy referenced bb!");
__intel_bb_destroy_relocations(ibb);
__intel_bb_destroy_objects(ibb);
gem_close(ibb->i915, ibb->handle);
if (ibb->fence >= 0)
close(ibb->fence);
free(ibb);
}
......@@ -1291,10 +1359,21 @@ void intel_bb_destroy(struct intel_bb *ibb)
* @ibb: pointer to intel_bb
* @purge_objects_cache: if true destroy internal execobj and relocs + cache
*
* Recreate batch bo.
* Recreate batch bo when there's no additional reference.
*/
void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
{
uint64_t bb_address;
if (purge_objects_cache && ibb->refcount > 1)
igt_warn("Cannot purge objects cache on bb, refcount > 1!");
/* Someone keeps reference, just exit */
if (ibb->refcount > 1)
return;
__intel_bb_destroy_relocations(ibb);
if (purge_objects_cache) {
__intel_bb_destroy_objects(ibb);
__reallocate_objects(ibb);
......@@ -1303,8 +1382,75 @@ void intel_bb_reset(struct intel_bb *ibb, bool purge_objects_cache)
gem_close(ibb->i915, ibb->handle);
ibb->handle = gem_create(ibb->i915, ibb->size);
intel_bb_add_object(ibb, ibb->handle, 0, false);
bb_address = __intel_bb_propose_offset(ibb);
intel_bb_add_object(ibb, ibb->handle, bb_address, false);
ibb->ptr = ibb->batch;
memset(ibb->batch, 0, ibb->size);
}
/*
* intel_bb_sync:
* @ibb: pointer to intel_bb
*
* Waits for bb completion. Returns 0 on success, otherwise errno.
*/
int intel_bb_sync(struct intel_bb *ibb)
{
int ret;
if (ibb->fence < 0)
return 0;
ret = sync_fence_wait(ibb->fence, -1);
if (ret == 0) {
close(ibb->fence);
ibb->fence = -1;
}
return ret;
}
/*
* intel_bb_print:
* @ibb: pointer to intel_bb
*
* Prints batch to stdout.
*/
void intel_bb_print(struct intel_bb *ibb)
{
igt_info("drm fd: %d, gen: %d, devid: %u, debug: %d\n",
ibb->i915, ibb->gen, ibb->devid, ibb->debug);
igt_info("handle: %u, size: %u, batch: %p, ptr: %p\n",
ibb->handle, ibb->size, ibb->batch, ibb->ptr);
igt_info("prng: %u, gtt_size: %" PRIu64 ", supports 48bit: %d\n",
ibb->prng, ibb->gtt_size, ibb->supports_48b_address);
igt_info("ctx: %u\n", ibb->ctx);
igt_info("root: %p\n", ibb->root);
igt_info("objects: %p, num_objects: %u, allocated obj: %u\n",
ibb->objects, ibb->num_objects, ibb->allocated_objects);
igt_info("relocs: %p, num_relocs: %u, allocated_relocs: %u\n----\n",
ibb->relocs, ibb->num_relocs, ibb->allocated_relocs);
}
/*
* intel_bb_dump:
* @ibb: pointer to intel_bb
* @filename: name to which write bb
*
* Dump batch bo to file.
*/
void intel_bb_dump(struct intel_bb *ibb, const char *filename)
{
FILE *out;
void *ptr;
ptr = gem_mmap__device_coherent(ibb->i915, ibb->handle, 0, ibb->size,
PROT_READ);
out = fopen(filename, "wb");
igt_assert(out);
fwrite(ptr, ibb->size, 1, out);
fclose(out);
munmap(ptr, ibb->size);
}
/**
......@@ -1331,8 +1477,7 @@ static int __compare_objects(const void *p1, const void *p2)
* intel_bb_add_object:
* @ibb: pointer to intel_bb
* @handle: which handle to add to objects array
* @offset: presumed offset of the object when I915_EXEC_NO_RELOC flag is
* used in execbuf call
* @offset: presumed offset of the object when no relocation is enforced
* @write: does a handle is a render target
*
* Function adds or updates execobj slot in bb objects array and
......@@ -1352,6 +1497,7 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
i = ibb->num_objects;
object = &ibb->objects[i];
object->handle = handle;
object->offset = offset;
found = tsearch((void *) object, &ibb->root, __compare_objects);
......@@ -1360,18 +1506,8 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
else
object = *found;
/* Assign address once */
if (object->offset == 0) {
if (offset) {
object->offset = offset;
} else {
/* randomize the address, we try to avoid relocations */
offset = hars_petruska_f54_1_random64(&ibb->prng);
offset &= (ibb->gtt_size - 1);
offset &= ~(4096 - 1);
object->offset = offset;
}
}
if (object->offset == INTEL_BUF_INVALID_ADDRESS)
object->offset = __intel_bb_propose_offset(ibb);
if (write)
object->flags |= EXEC_OBJECT_WRITE;
......@@ -1382,6 +1518,23 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
return object;
}
static bool intel_bb_object_set_fence(struct intel_bb *ibb, uint32_t handle)
{
struct drm_i915_gem_exec_object2 object = { .handle = handle };
struct drm_i915_gem_exec_object2 **found;
found = tfind((void *) &object, &ibb->root, __compare_objects);
if (!found) {
igt_warn("Trying to set fence on not found handle: %u\n",
handle);
return false;
}
(*found)->flags |= EXEC_OBJECT_NEEDS_FENCE;
return true;
}
/*
* intel_bb_add_reloc:
* @ibb: pointer to intel_bb
......@@ -1390,8 +1543,10 @@ intel_bb_add_object(struct intel_bb *ibb, uint32_t handle,
* @write_domain: gem domain bit for the relocation
* @delta: delta value to add to @buffer's gpu address
* @offset: offset within bb to be patched
* @presumed_offset: address of the object in address space, important for
* I915_EXEC_NO_RELOC flag
* @presumed_offset: address of the object in address space. If -1 is passed
* then final offset of the object will be randomized (for no-reloc bb) or
* 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
* case address is known it should passed in @presumed_offset (for no-reloc).
*
* Function allocates additional relocation slot in reloc array for a handle.
* It also implicitly adds handle in the objects array if object doesn't
......@@ -1426,14 +1581,18 @@ static uint64_t intel_bb_add_reloc(struct intel_bb *ibb,
relocs[i].write_domain = write_domain;
relocs[i].delta = delta;
relocs[i].offset = offset;
relocs[i].presumed_offset = object->offset;
if (ibb->enforce_relocs)
relocs[i].presumed_offset = -1;
else
relocs[i].presumed_offset = object->offset;
igt_debug("add reloc: handle: %u, r/w: 0x%x/0x%x, "
"delta: 0x%" PRIx64 ", "
"offset: 0x%" PRIx64 ", "
"poffset: 0x%" PRIx64 "\n",
"poffset: %p\n",
handle, read_domains, write_domain,
delta, offset, presumed_offset);
delta, offset,
from_user_pointer(relocs[i].presumed_offset));
return object->offset;
}
......@@ -1445,8 +1604,10 @@ static uint64_t intel_bb_add_reloc(struct intel_bb *ibb,
* @read_domains: gem domain bits for the relocation
* @write_domain: gem domain bit for the relocation
* @delta: delta value to add to @buffer's gpu address
* @presumed_offset: address of the object in address space, important for
* I915_EXEC_NO_RELOC flag
* @presumed_offset: address of the object in address space. If -1 is passed
* then final offset of the object will be randomized (for no-reloc bb) or
* 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
* case address is known it should passed in @presumed_offset (for no-reloc).
* @write: does a handle is a render target
*
* Function prepares relocation (execobj if required + reloc) and emits
......@@ -1458,11 +1619,11 @@ static uint64_t intel_bb_add_reloc(struct intel_bb *ibb,
* on instruction (see instruction specification).
*/
uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
uint32_t handle,
uint32_t read_domains,
uint32_t write_domain,
uint64_t delta,
uint64_t presumed_offset)
uint32_t handle,
uint32_t read_domains,
uint32_t write_domain,
uint64_t delta,
uint64_t presumed_offset)
{
uint64_t address;
......@@ -1479,6 +1640,23 @@ uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
return address;
}
uint64_t intel_bb_emit_reloc_fenced(struct intel_bb *ibb,
uint32_t handle,
uint32_t read_domains,
uint32_t write_domain,
uint64_t delta,
uint64_t presumed_offset)
{
uint64_t address;
address = intel_bb_emit_reloc(ibb, handle, read_domains, write_domain,
delta, presumed_offset);
intel_bb_object_set_fence(ibb, handle);
return address;
}
/**
* intel_bb_offset_reloc:
* @ibb: pointer to intel_bb
......@@ -1486,8 +1664,10 @@ uint64_t intel_bb_emit_reloc(struct intel_bb *ibb,
* @read_domains: gem domain bits for the relocation
* @write_domain: gem domain bit for the relocation
* @offset: offset within bb to be patched
* @presumed_offset: address of the object in address space, important for
* I915_EXEC_NO_RELOC flag
* @presumed_offset: address of the object in address space. If -1 is passed
* then final offset of the object will be randomized (for no-reloc bb) or
* 0 (for reloc bb, in that case reloc.presumed_offset will be -1). In
* case address is known it should passed in @presumed_offset (for no-reloc).
*
* Function prepares relocation (execobj if required + reloc). It it used
* for editing batchbuffer via modifying structures. It means when we're
......@@ -1509,11 +1689,27 @@ uint64_t intel_bb_offset_reloc(struct intel_bb *ibb,
0, offset, presumed_offset);
}
static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
uint64_t intel_bb_offset_reloc_with_delta(struct intel_bb *ibb,
uint32_t handle,
uint32_t read_domains,
uint32_t write_domain,
uint32_t delta,
uint32_t offset,
uint64_t presumed_offset)
{
igt_assert(ibb);
return intel_bb_add_reloc(ibb, handle, read_domains, write_domain,
delta, offset, presumed_offset);
}
static void intel_bb_dump_execbuf(struct intel_bb *ibb,
struct drm_i915_gem_execbuffer2 *execbuf)
{
struct drm_i915_gem_exec_object2 *objects;
struct drm_i915_gem_relocation_entry *relocs, *reloc;
int i, j;
uint64_t address;
igt_info("execbuf batch len: %u, start offset: 0x%x, "
"DR1: 0x%x, DR4: 0x%x, "
......@@ -1529,26 +1725,33 @@ static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
objects = &((struct drm_i915_gem_exec_object2 *)
from_user_pointer(execbuf->buffers_ptr))[i];
relocs = from_user_pointer(objects->relocs_ptr);
address = objects->offset;
if (address != INTEL_BUF_INVALID_ADDRESS)
address = address & (ibb->gtt_size - 1);
igt_info(" [%d] handle: %u, reloc_count: %d, reloc_ptr: %p, "
"align: 0x%llx, offset: 0x%llx, flags: 0x%llx, "
"align: 0x%llx, offset: 0x%" PRIx64 ", flags: 0x%llx, "
"rsvd1: 0x%llx, rsvd2: 0x%llx\n",
i, objects->handle, objects->relocation_count,
relocs,
objects->alignment,
objects->offset, objects->flags,
address,
objects->flags,
objects->rsvd1, objects->rsvd2);
if (objects->relocation_count) {
igt_info("\texecbuf relocs:\n");
for (j = 0; j < objects->relocation_count; j++) {
reloc = &relocs[j];
address = reloc->presumed_offset;
if (address != INTEL_BUF_INVALID_ADDRESS)
address = address & (ibb->gtt_size - 1);
igt_info("\t [%d] target handle: %u, "
"offset: 0x%llx, delta: 0x%x, "
"presumed_offset: 0x%llx, "
"presumed_offset: 0x%" PRIx64 ", "
"read_domains: 0x%x, "
"write_domain: 0x%x\n",
j, reloc->target_handle,
reloc->offset, reloc->delta,
reloc->presumed_offset,
address,
reloc->read_domains,
reloc->write_domain);
}
......@@ -1559,7 +1762,7 @@ static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
static void print_node(const void *node, VISIT which, int depth)
{
const struct drm_i915_gem_exec_object2 *object =
*(const struct drm_i915_gem_exec_object2 **) node;
*(const struct drm_i915_gem_exec_object2 **) node;
(void) depth;
switch (which) {
......@@ -1593,7 +1796,7 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
uint32_t ctx, uint64_t flags, bool sync)
{
struct drm_i915_gem_execbuffer2 execbuf;
int ret;
int ret, fence, new_fence;
ibb->objects[0].relocs_ptr = to_user_pointer(ibb->relocs);
ibb->objects[0].relocation_count = ibb->num_relocs;
......@@ -1606,17 +1809,34 @@ int __intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset,
execbuf.buffer_count = ibb->num_objects;
execbuf.batch_len = end_offset;
execbuf.rsvd1 = ibb->ctx = ctx;
execbuf.flags = flags | I915_EXEC_BATCH_FIRST;
ret = __gem_execbuf(ibb->i915, &execbuf);
if (ret)
execbuf.flags = flags | I915_EXEC_BATCH_FIRST | I915_EXEC_FENCE_OUT;
if (ibb->enforce_relocs)
execbuf.flags &= ~I915_EXEC_NO_RELOC;
execbuf.rsvd2 = 0;
ret = __gem_execbuf_wr(ibb->i915, &execbuf);
if (ret) {
intel_bb_dump_execbuf(ibb, &execbuf);
return ret;
}
/* Save/merge fences */
fence = execbuf.rsvd2 >> 32;
if (ibb->fence < 0) {
ibb->fence = fence;
} else {
new_fence = sync_fence_merge(ibb->fence, fence);
close(ibb->fence);
close(fence);
ibb->fence = new_fence;
}
if (sync || ibb->debug)
gem_sync(ibb->i915, ibb->handle);
igt_assert(intel_bb_sync(ibb) == 0);
if (ibb->debug) {
intel_bb_dump_execbuf(&execbuf);
intel_bb_dump_execbuf(ibb, &execbuf);
if (intel_bb_debug_tree) {
igt_info("\nTree:\n");
twalk(ibb->root, print_node);
......@@ -1672,14 +1892,20 @@ uint64_t intel_bb_get_object_offset(struct intel_bb *ibb, uint32_t handle)
{
struct drm_i915_gem_exec_object2 object = { .handle = handle };
struct drm_i915_gem_exec_object2 **found;
uint64_t address;
igt_assert(ibb);
found = tfind((void *) &object, &ibb->root, __compare_objects);
found = tfind((void *)&object, &ibb->root, __compare_objects);
if (!found)
return 0;
return INTEL_BUF_INVALID_ADDRESS;
address = (*found)->offset;
return (*found)->offset;
if (address == INTEL_BUF_INVALID_ADDRESS)
return address;
return address & (ibb->gtt_size - 1);
}
/**
......@@ -1698,7 +1924,7 @@ bool intel_bb_object_offset_to_buf(struct intel_bb *ibb, struct intel_buf *buf)
igt_assert(ibb);
igt_assert(buf);
found = tfind((void *) &object, &ibb->root, __compare_objects);
found = tfind((void *)&object, &ibb->root, __compare_objects);
if (!found) {
buf->addr.offset = 0;
buf->addr.ctx = 0;
......@@ -1706,8 +1932,196 @@ bool intel_bb_object_offset_to_buf(struct intel_bb *ibb, struct intel_buf *buf)
return false;
}
buf->addr.offset = (*found)->offset;
buf->addr.offset = (*found)->offset & (ibb->gtt_size - 1);
buf->addr.ctx = ibb->ctx;
return true;
}
/*
* intel_bb_emit_bbe:
* @ibb: batchbuffer
*
* Outputs MI_BATCH_BUFFER_END and ensures batch is properly aligned.
*/
uint32_t intel_bb_emit_bbe(struct intel_bb *ibb)
{
/* Mark the end of the buffer. */
intel_bb_out(ibb, MI_BATCH_BUFFER_END);
intel_bb_ptr_align(ibb, 8);
return intel_bb_offset(ibb);
}
/*
* intel_bb_flush_with_context_ring:
* @ibb: batchbuffer
* @ctx: context id
* @ring: ring
*
* Submits the batch for execution on the @ring engine with the supplied
* hardware context @ctx.
*/
static void intel_bb_flush_with_context_ring(struct intel_bb *ibb,
uint32_t ctx, uint32_t ring)
{
intel_bb_exec_with_context(ibb, intel_bb_offset(ibb), ctx,
ring | I915_EXEC_NO_RELOC,
false);
intel_bb_reset(ibb, false);
}
void intel_bb_flush_render(struct intel_bb *ibb)
{
uint32_t ring = I915_EXEC_RENDER;
intel_bb_flush_with_context_ring(ibb, ibb->ctx, ring);
}
void intel_bb_flush_blit(struct intel_bb *ibb)
{
uint32_t ring = I915_EXEC_DEFAULT;
if (HAS_BLT_RING(ibb->devid))
ring = I915_EXEC_BLT;
intel_bb_flush_with_context_ring(ibb, ibb->ctx, ring);
}
uint32_t intel_bb_copy_data(struct intel_bb *ibb,
const void *data, unsigned int bytes,