Commit 4ba0b562 authored by Connor Abbott's avatar Connor Abbott

unknown1 -> uniform_buffers

Also, document the reg_preload field of shader_meta.
parent 895b4840
......@@ -233,8 +233,8 @@ struct mali_shader_meta {
union {
struct {
u32 uses_uniforms : 1;
u32 unk1 : 31; // = 0x4000000 for vertex, 0x4ac0100 for tiler
u32 uniform_buffer_count : 4;
u32 unk1 : 28; // = 0x800000 for vertex, 0x958020 for tiler
} bifrost1;
struct {
/* 0x200 except MALI_NO_ALPHA_TO_COVERAGE. Mysterious 1
......@@ -272,7 +272,40 @@ struct mali_shader_meta {
union {
struct {
u32 unk3 : 15; // = 0x6000
u32 unk3 : 7;
/* On Bifrost, some system values are preloaded in
* registers R55-R62 by the thread dispatcher prior to
* the start of shader execution. This is a bitfield
* with one entry for each register saying which
* registers need to be preloaded. Right now, the known
* values are:
*
* Vertex/compute:
* - R55 : gl_LocalInvocationID.xy
* - R56 : gl_LocalInvocationID.z + unknown in high 16 bits
* - R57 : gl_WorkGroupID.x
* - R58 : gl_WorkGroupID.y
* - R59 : gl_WorkGroupID.z
* - R60 : gl_GlobalInvocationID.x
* - R61 : gl_GlobalInvocationID.y/gl_VertexID (without base)
* - R62 : gl_GlobalInvocationID.z/gl_InstanceID (without base)
*
* Fragment:
* - R55 : unknown, never seen (but the bit for this is
* always set?)
* - R56 : unknown (bit always unset)
* - R57 : gl_PrimitiveID
* - R58 : gl_FrontFacing in low bit, potentially other stuff
* - R59 : u16 fragment coordinates (used to compute
* gl_FragCoord.xy, together with sample positions)
* - R60 : gl_SampleMask (used in epilog, so pretty
* much always used, but the bit is always 0 -- is
* this just always pushed?)
* - R61 : gl_SampleMaskIn and gl_SampleID, used by
* varying interpolation.
* - R62 : unknown (bit always unset).
*/
u32 preload_regs : 8;
/* In units of 8 bytes or 64 bits, since the
* uniform/const port loads 64 bits at a time.
*/
......@@ -409,16 +442,20 @@ enum mali_fbd_type {
#define FBD_TYPE (1)
#define FBD_MASK (~0x3f)
struct mali_unknown1 {
u8 flags;
/* Yes, this really isn't aligned. Go figure. It points to a buffer of
* size 48 for vertex jobs and 32 for tiler jobs which is part of the
* cmdstream, which so far is always 0.
struct mali_uniform_buffer_meta {
/* This is actually the size minus 1 (MALI_POSITIVE), in units of 16
* bytes. This gives a maximum of 2^14 bytes, which just so happens to
* be the GL minimum-maximum for GL_MAX_UNIFORM_BLOCK_SIZE.
*/
mali_ptr ptr;
/* padding? */
u64 zero : 64 - 8;
} __attribute__((packed));
u64 size : 10;
/* This is missing the bottom 2 bits and top 8 bits. The top 8 bits
* should be 0 for userspace pointers, according to
* https://lwn.net/Articles/718895/. By reusing these bits, we can make
* each entry in the table only 64 bits.
*/
mali_ptr ptr : 64 - 10;
};
/* On Bifrost, these fields are the same between the vertex and tiler payloads.
* They also seem to be the same between Bifrost and Midgard. They're shared in
......@@ -446,8 +483,8 @@ struct mali_vertex_tiler_prefix {
* and hence the real value is one.
*
* Vertex jobs reuse the same job dispatch mechanism as compute jobs,
* effectively doing glDispatchCompute(1, vertex_count, 1) where vertex
* count is the number of vertices.
* effectively doing glDispatchCompute(1, vertex_count, instance_count)
* where vertex count is the number of vertices.
*/
u32 invocation_count;
......@@ -551,7 +588,10 @@ struct mali_vertex_tiler_postfix {
uintptr_t position_varying;
uintptr_t unknown1; /* pointer */
/* An array of mali_uniform_buffer_meta's. The size is given by the
* shader_meta.
*/
uintptr_t uniform_buffers;
/* For reasons I don't quite understand this is a pointer to a pointer.
* That second pointer points to the actual texture descriptor. */
......@@ -579,7 +619,7 @@ struct mali_vertex_tiler_postfix {
mali_ptr framebuffer;
#if UINTPTR_MAX == 0xffffffffffffffff /* 64-bit */
/* most likely padding to make this a multiple of 16 bytes */
/* most likely padding to make this a multiple of 64 bytes */
u64 zero7;
#endif
} __attribute__((packed));
......
......@@ -823,6 +823,63 @@ panwrap_replay_vertex_tiler_prefix(struct mali_vertex_tiler_prefix *p, int job_n
panwrap_log("},\n");
}
static void
panwrap_replay_uniform_buffers(mali_ptr pubufs, int ubufs_count, int job_no)
{
struct panwrap_mapped_memory *umem = panwrap_find_mapped_gpu_mem_containing(pubufs);
struct mali_uniform_buffer_meta *PANWRAP_PTR_VAR(ubufs, umem, pubufs);
for (int i = 0; i < ubufs_count; i++) {
mali_ptr ptr = ubufs[i].ptr << 2;
struct panwrap_mapped_memory *umem2 = panwrap_find_mapped_gpu_mem_containing(ptr);
uint32_t *PANWRAP_PTR_VAR(ubuf, umem2, ptr);
char name[50];
snprintf(name, sizeof(name), "ubuf_%d", i);
/* The blob uses ubuf 0 to upload internal stuff and
* uniforms that won't fit/are accessed indirectly, so
* it puts it in the batchbuffer.
*/
panwrap_log("uint32_t %s_%d[] = {\n", name, job_no);
panwrap_indent++;
for (int j = 0; j <= ubufs[i].size; j++) {
for (int k = 0; k < 4; k++) {
if (k == 0)
panwrap_log("0x%"PRIx32", ", ubuf[4*j+k]);
else
panwrap_log_cont("0x%"PRIx32", ", ubuf[4*j+k]);
}
panwrap_log_cont("\n");
}
panwrap_indent--;
panwrap_log("};\n");
TOUCH_LEN(umem2, ptr, 16 * (ubufs[i].size + 1), name, job_no, i == 0);
}
panwrap_log("struct mali_uniform_buffer_meta uniform_buffers_%d[] = {\n",
job_no);
panwrap_indent++;
for (int i = 0; i < ubufs_count; i++) {
panwrap_log("{\n");
panwrap_indent++;
panwrap_prop("size = MALI_POSITIVE(%d)", ubufs[i].size + 1);
panwrap_prop("ptr = ubuf_%d_%d_p >> 2", i, job_no);
panwrap_indent--;
panwrap_log("},\n");
}
panwrap_indent--;
panwrap_log("};\n");
TOUCH_LEN(umem, pubufs, sizeof(struct mali_uniform_buffer_meta) * ubufs_count, "uniform_buffers", job_no, true);
}
static void
panwrap_replay_scratchpad(uintptr_t pscratchpad, int job_no, char *suffix)
{
......@@ -864,7 +921,7 @@ panwrap_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *
else
panwrap_replay_sfbd((u64) (uintptr_t) p->framebuffer, job_no);
int varying_count = 0, attribute_count = 0, uniform_count = 0;
int varying_count = 0, attribute_count = 0, uniform_count = 0, uniform_buffer_count = 0;
if (shader_meta_ptr) {
struct panwrap_mapped_memory *smem = panwrap_find_mapped_gpu_mem_containing(shader_meta_ptr);
......@@ -876,10 +933,14 @@ panwrap_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *
/* Save for dumps */
attribute_count = s->attribute_count;
varying_count = s->varying_count;
if (is_bifrost)
if (is_bifrost) {
uniform_count = s->bifrost2.uniform_count;
else
uniform_buffer_count = s->bifrost1.uniform_buffer_count;
} else {
uniform_count = s->midgard1.uniform_count;
/* TODO figure this out */
uniform_buffer_count = 1;
}
mali_ptr shader_ptr = panwrap_replay_shader_address("shader", s->shader);
......@@ -892,7 +953,7 @@ panwrap_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *
panwrap_log(".bifrost1 = {\n");
panwrap_indent++;
panwrap_prop("uses_uniforms = %" PRId32, s->bifrost1.uses_uniforms);
panwrap_prop("uniform_buffer_count = %" PRId32, s->bifrost1.uniform_buffer_count);
panwrap_prop("unk1 = 0x%" PRIx32, s->bifrost1.unk1);
panwrap_indent--;
......@@ -959,6 +1020,7 @@ panwrap_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *
panwrap_indent++;
panwrap_prop("unk3 = 0x%" PRIx32, s->bifrost2.unk3);
panwrap_prop("preload_regs = 0x%" PRIx32, s->bifrost2.preload_regs);
panwrap_prop("uniform_count = %" PRId32, s->bifrost2.uniform_count);
panwrap_prop("unk4 = 0x%" PRIx32, s->bifrost2.unk4);
......@@ -1122,25 +1184,8 @@ panwrap_replay_vertex_tiler_postfix_pre(const struct mali_vertex_tiler_postfix *
TOUCH_LEN(uniform_mem, p->uniforms, sz, "uniforms", job_no, true);
}
if (p->unknown1) {
struct panwrap_mapped_memory *umem = panwrap_find_mapped_gpu_mem_containing(p->unknown1);
if (umem) {
u64 *PANWRAP_PTR_VAR(u, umem, p->unknown1);
mali_ptr ptr = *u >> 8;
uint8_t flags = *u & 0xFF;
/* Points to... a buffer of zeroes in the same region?
* *shrug* Deliberate 0 length so we don't memset
* anything out */
panwrap_log("u32 inner_unknown1_%d = 0; /* XXX */\n", job_no);
TOUCH_LEN(umem, ptr, 0, "inner_unknown1", job_no, true);
panwrap_log("u64 unknown1_%d = ((inner_unknown1_%d_p) << 8) | %d;\n", job_no, job_no, flags);
TOUCH(umem, p->unknown1, u64, "unknown1", job_no, true);
}
if (p->uniform_buffers) {
panwrap_replay_uniform_buffers(p->uniform_buffers, uniform_buffer_count, job_no);
}
if (p->texture_trampoline) {
struct panwrap_mapped_memory *mmem = panwrap_find_mapped_gpu_mem_containing(p->texture_trampoline);
......@@ -1306,8 +1351,8 @@ panwrap_replay_vertex_tiler_postfix(const struct mali_vertex_tiler_postfix *p, i
DYN_MEMORY_PROP(p, job_no, position_varying);
MEMORY_COMMENT(p, position_varying);
DYN_MEMORY_PROP(p, job_no, unknown1);
MEMORY_COMMENT(p, unknown1);
DYN_MEMORY_PROP(p, job_no, uniform_buffers);
MEMORY_COMMENT(p, uniform_buffers);
DYN_MEMORY_PROP(p, job_no, texture_trampoline);
MEMORY_COMMENT(p, texture_trampoline);
DYN_MEMORY_PROP(p, job_no, sampler_descriptor);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment