Commit 419a154d authored by Rob Clark's avatar Rob Clark
Browse files

freedreno: support growable cmdstream buffers



The issue that userspace needed to solve is that there is ~two orders of
magnitude size difference in cmdstream buffers (both for gmem commands
and for draw commands), and that the previous practice of allocating
worst-case sizes is quite wasteful.  Previously a submit would be
constructed (for example) like:

  CMD  TARGET  DESCRIPTION
   g0    N     gmem/tiling commands
   b0    Y     binning commands
   d0    Y     draw commands

Which, after the one non-IB-target cmd buffer is inserted into the
kernel controlled ringbuffer, looks like (not to scale):

         b0:           d0:
        +-----+       +-----+
   IB1  | ... |       | ... |
        +-----+       +-----+
         ^             ^
         |             |
         +-----+       +-+---------+
         g0:   |         |         |
        +----+----+----+----+----+----+----
   IB0  | .. | IB | .. | IB | .. | IB | ...
        +----+----+----+----+----+----+----
         ^              tile0     tile1
         |
         +-----------+
  userspace          |
  ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  kernel             |
               ----+----+----
   ringbuffer  ... | IB | ...
               ----+----+----

Now, multiple physical cmdstream buffers per fd_ringbuffer are supported,
so this becomes:

  CMD  TARGET  DESCRIPTION
   g0    N
   ...   N     gmem/tiling commands
   gN    N
   b0    Y
   ...   Y     binning commands
   bN    Y
   d0    Y
   ...   Y     draw commands
   dN    Y

Which, after the non-IB-target cmd buffers (g0..gN) are inserted into
the kernel controlled ringbuffer, looks like:

             b0:      b1            d0:      d1
            +-----+  +-----+        +-----+  +-----+
       IB1  | ... |  | ... | ...    | ... |  | ... | ...
            +-----+  +-----+        +-----+  +-----+
             ^        ^              ^        ^
             |        |              |        |
             |        +-+            |  +-----+------+
             +-----+    |            |  |            |
                   |    |         +--+----------+    |
             g0:   |    |         |     |       |    |
            +----+----+----+----+----+----+---+----+----+----
       IB0  | .. | IB | IB | .. | IB | IB |.. | IB | IB |...
            +----+----+----+----+----+----+---+----+----+----
             ^                   tile0         tile1
             | to b0  to b1
             |   |      |          to|d0    to|d1
             |   |      +----+       |      +-+-----------+
             |   |           |       |      |             |
             |   +------+    |       +-+-------------+    |
             |    g1:   |    |         |    |        |    |
             |   +----+----+----+----+----+----+---+----+----+----
       IB0   |   | .. | IB | IB | .. | IB | IB |.. | IB | IB |...
             |   +----+----+----+----+----+----+---+----+----+----
             |    ^                   tileX         tileY
             |    |
             |    +-----------+
             +-----------+    |
      userspace          |    |
      ~~~~~~~~~~~~~~~~~~~|~~~~|~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
      kernel             |    |
                   ----+----+----+----
       ringbuffer  ... | IB | IB | ...
                   ----+----+----+----
Signed-off-by: default avatarRob Clark <robclark@freedesktop.org>
parent d93d697d
......@@ -134,12 +134,14 @@ struct fd_ringmarker {
struct fd_ringbuffer_funcs {
void * (*hostptr)(struct fd_ringbuffer *ring);
int (*flush)(struct fd_ringbuffer *ring, uint32_t *last_start);
void (*grow)(struct fd_ringbuffer *ring, uint32_t size);
void (*reset)(struct fd_ringbuffer *ring);
void (*emit_reloc)(struct fd_ringbuffer *ring,
const struct fd_reloc *reloc);
void (*emit_reloc_ring)(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target,
uint32_t (*emit_reloc_ring)(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx,
uint32_t submit_offset, uint32_t size);
uint32_t (*cmd_count)(struct fd_ringbuffer *ring);
void (*destroy)(struct fd_ringbuffer *ring);
};
......
......@@ -45,10 +45,9 @@ fd_ringbuffer_new(struct fd_pipe *pipe, uint32_t size)
if (!ring)
return NULL;
ring->size = size;
ring->pipe = pipe;
ring->start = ring->funcs->hostptr(ring);
ring->end = &(ring->start[size/4]);
ring->end = &(ring->start[ring->size/4]);
ring->cur = ring->last_start = ring->start;
......@@ -87,6 +86,22 @@ int fd_ringbuffer_flush(struct fd_ringbuffer *ring)
return ring->funcs->flush(ring, ring->last_start);
}
void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords)
{
assert(ring->funcs->grow); /* unsupported on kgsl */
/* there is an upper bound on IB size, which appears to be 0x100000 */
if (ring->size < 0x100000)
ring->size *= 2;
ring->funcs->grow(ring, ring->size);
ring->start = ring->funcs->hostptr(ring);
ring->end = &(ring->start[ring->size/4]);
ring->cur = ring->last_start = ring->start;
}
uint32_t fd_ringbuffer_timestamp(struct fd_ringbuffer *ring)
{
return ring->last_timestamp;
......@@ -108,7 +123,14 @@ void fd_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
submit_offset = offset_bytes(target->cur, target->ring->start);
size = offset_bytes(end->cur, target->cur);
ring->funcs->emit_reloc_ring(ring, target->ring, submit_offset, size);
ring->funcs->emit_reloc_ring(ring, target->ring, 0, submit_offset, size);
}
uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
{
if (!ring->funcs->cmd_count)
return 1;
return ring->funcs->cmd_count(ring);
}
uint32_t
......@@ -116,9 +138,7 @@ fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx)
{
uint32_t size = offset_bytes(target->cur, target->start);
assert(cmd_idx == 0);
ring->funcs->emit_reloc_ring(ring, target, 0, size);
return size;
return ring->funcs->emit_reloc_ring(ring, target, cmd_idx, 0, size);
}
struct fd_ringmarker * fd_ringmarker_new(struct fd_ringbuffer *ring)
......
......@@ -56,6 +56,7 @@ void fd_ringbuffer_set_parent(struct fd_ringbuffer *ring,
struct fd_ringbuffer *parent);
void fd_ringbuffer_reset(struct fd_ringbuffer *ring);
int fd_ringbuffer_flush(struct fd_ringbuffer *ring);
void fd_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t ndwords);
uint32_t fd_ringbuffer_timestamp(struct fd_ringbuffer *ring);
static inline void fd_ringbuffer_emit(struct fd_ringbuffer *ring,
......@@ -77,6 +78,7 @@ struct fd_reloc {
void fd_ringbuffer_reloc(struct fd_ringbuffer *ring, const struct fd_reloc *reloc);
will_be_deprecated void fd_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
struct fd_ringmarker *target, struct fd_ringmarker *end);
uint32_t fd_ringbuffer_cmd_count(struct fd_ringbuffer *ring);
uint32_t fd_ringbuffer_emit_reloc_ring_full(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx);
......
......@@ -173,12 +173,14 @@ static void kgsl_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
kgsl_pipe_add_submit(to_kgsl_pipe(ring->pipe), kgsl_bo);
}
static void kgsl_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target,
static uint32_t kgsl_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx,
uint32_t submit_offset, uint32_t size)
{
struct kgsl_ringbuffer *target_ring = to_kgsl_ringbuffer(target);
assert(cmd_idx == 0);
(*ring->cur++) = target_ring->bo->gpuaddr + submit_offset;
return size;
}
static void kgsl_ringbuffer_destroy(struct fd_ringbuffer *ring)
......@@ -213,6 +215,7 @@ drm_private struct fd_ringbuffer * kgsl_ringbuffer_new(struct fd_pipe *pipe,
ring = &kgsl_ring->base;
ring->funcs = &funcs;
ring->size = size;
kgsl_ring->bo = kgsl_rb_bo_new(to_kgsl_pipe(pipe), size);
if (!kgsl_ring->bo) {
......
......@@ -40,12 +40,16 @@
* a backing bo, and a reloc table.
*/
struct msm_cmd {
struct list_head list;
struct fd_ringbuffer *ring;
struct fd_bo *ring_bo;
/* reloc's table: */
struct drm_msm_gem_submit_reloc *relocs;
uint32_t nr_relocs, max_relocs;
uint32_t size;
};
struct msm_ringbuffer {
......@@ -75,10 +79,28 @@ struct msm_ringbuffer {
struct msm_cmd **cmds;
uint32_t nr_cmds, max_cmds;
/* current cmd-buffer: */
struct msm_cmd *cmd;
/* List of physical cmdstream buffers (msm_cmd) assocated with this
* logical fd_ringbuffer.
*
* Note that this is different from msm_ringbuffer::cmds (which
* shadows msm_ringbuffer::submit::cmds for tracking submit ioctl
* related stuff, and *only* is tracked in the parent ringbuffer.
* And only has "completed" cmd buffers (ie. we already know the
* size) added via get_cmd().
*/
struct list_head cmd_list;
int is_growable;
unsigned cmd_count;
};
static inline struct msm_ringbuffer * to_msm_ringbuffer(struct fd_ringbuffer *x)
{
return (struct msm_ringbuffer *)x;
}
#define INIT_SIZE 0x1000
static pthread_mutex_t idx_lock = PTHREAD_MUTEX_INITIALIZER;
drm_private extern pthread_mutex_t table_lock;
......@@ -118,12 +140,15 @@ static void ring_cmd_del(struct msm_cmd *cmd)
{
if (cmd->ring_bo)
ring_bo_del(cmd->ring->pipe->dev, cmd->ring_bo);
list_del(&cmd->list);
to_msm_ringbuffer(cmd->ring)->cmd_count--;
free(cmd->relocs);
free(cmd);
}
static struct msm_cmd * ring_cmd_new(struct fd_ringbuffer *ring, uint32_t size)
{
struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
struct msm_cmd *cmd = calloc(1, sizeof(*cmd));
if (!cmd)
......@@ -134,6 +159,9 @@ static struct msm_cmd * ring_cmd_new(struct fd_ringbuffer *ring, uint32_t size)
if (!cmd->ring_bo)
goto fail;
list_addtail(&cmd->list, &msm_ring->cmd_list);
msm_ring->cmd_count++;
return cmd;
fail:
......@@ -158,9 +186,11 @@ static void *grow(void *ptr, uint32_t nr, uint32_t *max, uint32_t sz)
(x)->nr_ ## name ++; \
})
static inline struct msm_ringbuffer * to_msm_ringbuffer(struct fd_ringbuffer *x)
static struct msm_cmd *current_cmd(struct fd_ringbuffer *ring)
{
return (struct msm_ringbuffer *)x;
struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
assert(!LIST_IS_EMPTY(&msm_ring->cmd_list));
return LIST_LAST_ENTRY(&msm_ring->cmd_list, struct msm_cmd, list);
}
static uint32_t append_bo(struct fd_ringbuffer *ring, struct fd_bo *bo)
......@@ -248,12 +278,13 @@ static void get_cmd(struct fd_ringbuffer *ring, struct msm_cmd *target_cmd,
cmd->submit_offset = submit_offset;
cmd->size = size;
cmd->pad = 0;
target_cmd->size = size;
}
static void * msm_ringbuffer_hostptr(struct fd_ringbuffer *ring)
{
struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
return fd_bo_map(msm_ring->cmd->ring_bo);
return fd_bo_map(current_cmd(ring)->ring_bo);
}
static uint32_t find_next_reloc_idx(struct msm_cmd *msm_cmd,
......@@ -271,6 +302,15 @@ static uint32_t find_next_reloc_idx(struct msm_cmd *msm_cmd,
return i;
}
static void delete_cmds(struct msm_ringbuffer *msm_ring)
{
struct msm_cmd *cmd, *tmp;
LIST_FOR_EACH_ENTRY_SAFE(cmd, tmp, &msm_ring->cmd_list, list) {
ring_cmd_del(cmd);
}
}
static void flush_reset(struct fd_ringbuffer *ring)
{
struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
......@@ -288,11 +328,36 @@ static void flush_reset(struct fd_ringbuffer *ring)
target_cmd->nr_relocs = 0;
}
msm_ring->cmd->nr_relocs = 0;
msm_ring->submit.nr_cmds = 0;
msm_ring->submit.nr_bos = 0;
msm_ring->nr_cmds = 0;
msm_ring->nr_bos = 0;
if (msm_ring->is_growable) {
delete_cmds(msm_ring);
} else {
/* in old mode, just reset the # of relocs: */
current_cmd(ring)->nr_relocs = 0;
}
}
static void finalize_current_cmd(struct fd_ringbuffer *ring, uint32_t *last_start)
{
uint32_t submit_offset, size, type;
struct fd_ringbuffer *parent;
if (ring->parent) {
parent = ring->parent;
type = MSM_SUBMIT_CMD_IB_TARGET_BUF;
} else {
parent = ring;
type = MSM_SUBMIT_CMD_BUF;
}
submit_offset = offset_bytes(last_start, ring->start);
size = offset_bytes(ring->cur, last_start);
get_cmd(parent, current_cmd(ring), submit_offset, size, type);
}
static void dump_submit(struct msm_ringbuffer *msm_ring)
......@@ -323,13 +388,10 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start
struct drm_msm_gem_submit req = {
.pipe = to_msm_pipe(ring->pipe)->pipe,
};
uint32_t i, submit_offset, size;
uint32_t i;
int ret;
submit_offset = offset_bytes(last_start, ring->start);
size = offset_bytes(ring->cur, last_start);
get_cmd(ring, msm_ring->cmd, submit_offset, size, MSM_SUBMIT_CMD_BUF);
finalize_current_cmd(ring, last_start);
/* needs to be after get_cmd() as that could create bos/cmds table: */
req.bos = VOID2U64(msm_ring->submit.bos),
......@@ -367,6 +429,13 @@ static int msm_ringbuffer_flush(struct fd_ringbuffer *ring, uint32_t *last_start
return ret;
}
static void msm_ringbuffer_grow(struct fd_ringbuffer *ring, uint32_t size)
{
assert(to_msm_ringbuffer(ring)->is_growable);
finalize_current_cmd(ring, ring->last_start);
ring_cmd_new(ring, size);
}
static void msm_ringbuffer_reset(struct fd_ringbuffer *ring)
{
flush_reset(ring);
......@@ -375,14 +444,14 @@ static void msm_ringbuffer_reset(struct fd_ringbuffer *ring)
static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
const struct fd_reloc *r)
{
struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
struct fd_ringbuffer *parent = ring->parent ? ring->parent : ring;
struct msm_bo *msm_bo = to_msm_bo(r->bo);
struct drm_msm_gem_submit_reloc *reloc;
uint32_t idx = APPEND(msm_ring->cmd, relocs);
struct msm_cmd *cmd = current_cmd(ring);
uint32_t idx = APPEND(cmd, relocs);
uint32_t addr;
reloc = &msm_ring->cmd->relocs[idx];
reloc = &cmd->relocs[idx];
reloc->reloc_idx = bo2idx(parent, r->bo, r->flags);
reloc->reloc_offset = r->offset;
......@@ -398,26 +467,53 @@ static void msm_ringbuffer_emit_reloc(struct fd_ringbuffer *ring,
(*ring->cur++) = addr | r->or;
}
static void msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target,
static uint32_t msm_ringbuffer_emit_reloc_ring(struct fd_ringbuffer *ring,
struct fd_ringbuffer *target, uint32_t cmd_idx,
uint32_t submit_offset, uint32_t size)
{
struct msm_cmd *cmd = to_msm_ringbuffer(target)->cmd;
struct msm_cmd *cmd = NULL;
uint32_t idx = 0;
LIST_FOR_EACH_ENTRY(cmd, &to_msm_ringbuffer(target)->cmd_list, list) {
if (idx == cmd_idx)
break;
idx++;
}
get_cmd(ring, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF);
assert(cmd && (idx == cmd_idx));
if (idx < (to_msm_ringbuffer(target)->cmd_count - 1)) {
/* All but the last cmd buffer is fully "baked" (ie. already has
* done get_cmd() to add it to the cmds table). But in this case,
* the size we get is invalid (since it is calculated from the
* last cmd buffer):
*/
size = cmd->size;
} else {
get_cmd(ring, cmd, submit_offset, size, MSM_SUBMIT_CMD_IB_TARGET_BUF);
}
msm_ringbuffer_emit_reloc(ring, &(struct fd_reloc){
.bo = cmd->ring_bo,
.flags = FD_RELOC_READ,
.offset = submit_offset,
});
return size;
}
static uint32_t msm_ringbuffer_cmd_count(struct fd_ringbuffer *ring)
{
return to_msm_ringbuffer(ring)->cmd_count;
}
static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
{
struct msm_ringbuffer *msm_ring = to_msm_ringbuffer(ring);
if (msm_ring->cmd)
ring_cmd_del(msm_ring->cmd);
flush_reset(ring);
delete_cmds(msm_ring);
free(msm_ring->submit.cmds);
free(msm_ring->submit.bos);
free(msm_ring->bos);
......@@ -428,9 +524,11 @@ static void msm_ringbuffer_destroy(struct fd_ringbuffer *ring)
static const struct fd_ringbuffer_funcs funcs = {
.hostptr = msm_ringbuffer_hostptr,
.flush = msm_ringbuffer_flush,
.grow = msm_ringbuffer_grow,
.reset = msm_ringbuffer_reset,
.emit_reloc = msm_ringbuffer_emit_reloc,
.emit_reloc_ring = msm_ringbuffer_emit_reloc_ring,
.cmd_count = msm_ringbuffer_cmd_count,
.destroy = msm_ringbuffer_destroy,
};
......@@ -446,15 +544,20 @@ drm_private struct fd_ringbuffer * msm_ringbuffer_new(struct fd_pipe *pipe,
goto fail;
}
if (size == 0) {
assert(pipe->dev->version >= FD_VERSION_UNLIMITED_CMDS);
size = INIT_SIZE;
msm_ring->is_growable = TRUE;
}
list_inithead(&msm_ring->cmd_list);
ring = &msm_ring->base;
ring->funcs = &funcs;
ring->size = size;
ring->pipe = pipe; /* needed in ring_cmd_new() */
msm_ring->cmd = ring_cmd_new(ring, size);
if (!msm_ring->cmd) {
ERROR_MSG("command buffer allocation failed");
goto fail;
}
ring_cmd_new(ring, size);
return ring;
fail:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment