Commit 895b4840 authored by Connor Abbott's avatar Connor Abbott

panwrap: Decode compute jobs

It turns out that compute jobs reuse the same format as vertex jobs, so
this also lets us decode some previously unknown fields in
mali_vertex_tiler_prefix.
parent 0dddbcdf
......@@ -426,13 +426,50 @@ struct mali_unknown1 {
*/
struct mali_vertex_tiler_prefix {
/* Off by one */
u32 vertex_count;
u32 unk1; // 0x28000000
/* This is a dynamic bitfield containing the following things in this order:
*
* - gl_WorkGroupSize.x
* - gl_WorkGroupSize.y
* - gl_WorkGroupSize.z
* - gl_NumWorkGroups.x
* - gl_NumWorkGroups.y
* - gl_NumWorkGroups.z
*
* The number of bits allocated for each number is based on the *_shift
* fields below. For example, workgroups_y_shift gives the bit that
* gl_NumWorkGroups.y starts at, and workgroups_z_shift gives the bit
* that gl_NumWorkGroups.z starts at (and therefore one after the bit
* that gl_NumWorkGroups.y ends at). The actual value for each gl_*
* value is one more than the stored value, since if any of the values
* are zero, then there would be no invocations (and hence no job). If
* there were 0 bits allocated to a given field, then it must be zero,
* and hence the real value is one.
*
* Vertex jobs reuse the same job dispatch mechanism as compute jobs,
* effectively doing glDispatchCompute(1, vertex_count, 1) where vertex
* count is the number of vertices.
*/
u32 invocation_count;
u32 size_y_shift : 5;
u32 size_z_shift : 5;
u32 workgroups_x_shift : 6;
u32 workgroups_y_shift : 6;
u32 workgroups_z_shift : 6;
/* This is max(workgroups_x_shift, 2) in all the cases I've seen. */
u32 workgroups_x_shift_2 : 4;
u32 draw_mode : 4;
u32 unknown_draw : 22;
/* This is the the same as workgroups_x_shift_2 in compute shaders, but
* always 5 for vertex jobs and 6 for tiler jobs. I suspect this has
* something to do with how many quads get put in the same execution
* engine, which is a balance (you don't want to starve the engine, but
* you also want to distribute work evenly).
*/
u32 workgroups_x_shift_3 : 6;
unsigned draw_mode : 4;
unsigned unknown_draw : 28;
u32 zero0;
u32 zero1;
......@@ -486,6 +523,7 @@ struct bifrost_tiler_only {
/* 0x20 */
float line_width;
u32 zero0;
mali_ptr tiler_meta;
u64 zero1, zero2, zero3, zero4, zero5, zero6;
......
......@@ -379,8 +379,8 @@ void panwrap_replay_mfbd_bfr(uint64_t gpu_va, int job_no)
/* Assume that unknown1 and tiler_meta were emitted in the last job for
* now */
panwrap_prop("unknown1 = unknown1_%d", job_no - 1);
panwrap_prop("tiler_meta = tiler_meta_%d", job_no - 1);
panwrap_prop("unknown1 = unknown1_%d_p", job_no - 1);
panwrap_prop("tiler_meta = tiler_meta_%d_p", job_no - 1);
panwrap_prop("width1 = MALI_POSITIVE(%d)", fb->width1 + 1);
panwrap_prop("height1 = MALI_POSITIVE(%d)", fb->height1 + 1);
......@@ -762,16 +762,47 @@ panwrap_replay_indices(uintptr_t pindices, uint32_t index_count, int job_no)
}
}
/* return bits [lo, hi) of word */
static u32 bits(u32 word, u32 lo, u32 hi)
{
if (hi - lo >= 32)
return word; // avoid undefined behavior with the shift
return (word >> lo) & ((1 << (hi - lo)) - 1);
}
static void
panwrap_replay_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_no)
{
panwrap_log_cont("{\n");
panwrap_indent++;
panwrap_prop("vertex_count = MALI_POSITIVE(%" PRId32 ")", p->vertex_count + 1);
panwrap_prop("unk1 = 0x%" PRIx32, p->unk1);
panwrap_prop("invocation_count = %" PRIx32, p->invocation_count);
panwrap_prop("size_y_shift = %d", p->size_y_shift);
panwrap_prop("size_z_shift = %d", p->size_z_shift);
panwrap_prop("workgroups_x_shift = %d", p->workgroups_x_shift);
panwrap_prop("workgroups_y_shift = %d", p->workgroups_y_shift);
panwrap_prop("workgroups_z_shift = %d", p->workgroups_z_shift);
panwrap_prop("workgroups_x_shift_2 = 0x%" PRIx32, p->workgroups_x_shift_2);
/* Decode invocation_count. See the comment before the definition of
* invocation_count for an explanation.
*/
panwrap_msg("size: (%d, %d, %d)\n",
bits(p->invocation_count, 0, p->size_y_shift) + 1,
bits(p->invocation_count, p->size_y_shift, p->size_z_shift) + 1,
bits(p->invocation_count, p->size_z_shift,
p->workgroups_x_shift) + 1);
panwrap_msg("workgroups: (%d, %d, %d)\n",
bits(p->invocation_count, p->workgroups_x_shift,
p->workgroups_y_shift) + 1,
bits(p->invocation_count, p->workgroups_y_shift,
p->workgroups_z_shift) + 1,
bits(p->invocation_count, p->workgroups_z_shift,
32) + 1);
panwrap_prop("unknown_draw = 0x%" PRIx32, p->unknown_draw);
panwrap_prop("workgroups_x_shift_3 = 0x%" PRIx32, p->workgroups_x_shift_3);
panwrap_prop("draw_mode = %s", panwrap_gl_mode_name(p->draw_mode));
......@@ -930,7 +961,6 @@ panwrap_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *
panwrap_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3);
panwrap_prop("uniform_count = %" PRId32, s->bifrost2.uniform_count);
panwrap_prop("unk4 = 0x%" PRIx32, s->bifrost2.unk4);
panwrap_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3);
panwrap_indent--;
panwrap_log("}\n");
......@@ -1708,6 +1738,7 @@ int panwrap_replay_jc(mali_ptr jc_gpu_va, bool bifrost)
}
case JOB_TYPE_TILER:
case JOB_TYPE_VERTEX:
case JOB_TYPE_COMPUTE:
if (bifrost) {
if (h->job_type == JOB_TYPE_TILER)
payload_size = panwrap_replay_tiler_job_bfr(h, mem, payload_ptr, job_no);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment