From b0f863938225d12bd3725029309cbb9d73d1bd70 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 5 Sep 2022 21:24:56 -0400 Subject: [PATCH 01/17] asahi: Assert cache line alignment on Z/S buffers Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 045d2877089a..5be63d711367 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -756,17 +756,17 @@ - - + + - + - - + + - - - + + + -- GitLab From 1400733320414b72765bc02dbc556aa1c95020ff Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 5 Sep 2022 21:42:20 -0400 Subject: [PATCH 02/17] asahi: Identify ZLS Control word from PowerVR We're into the cr.xml file now, which is the blob that gets passed through the kernel. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 31 ++++++++++++++++++++++--------- src/gallium/drivers/asahi/magic.c | 13 ++++++------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 5be63d711367..b459a1ec4a38 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -736,6 +736,27 @@ + + + + + + + + + + + + + + + + + + + + + @@ -745,15 +766,7 @@ - - - - - + diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c index 9683b6bc76a7..767001becbf7 100644 --- a/src/gallium/drivers/asahi/magic.c +++ b/src/gallium/drivers/asahi/magic.c @@ -207,21 +207,20 @@ demo_cmdbuf(uint64_t *buf, size_t size, if (util_format_has_depth(desc)) { depth_buffer = agx_map_surface(zsbuf); - cfg.depth_reload = !should_clear_depth; - cfg.depth_flags |= 0x80000; - if (!should_clear_depth) cfg.depth_flags |= 0x8000; + cfg.zls_control.z_store_enable = true; + cfg.zls_control.z_load_enable = !should_clear_depth; } else { stencil_buffer = agx_map_surface(zsbuf); - cfg.depth_flags |= 0x40000; - if (!should_clear_stencil) cfg.depth_flags |= 0x4000; + cfg.zls_control.s_store_enable = true; + cfg.zls_control.s_load_enable = !should_clear_stencil; } if (agx_resource(zsbuf->texture)->separate_stencil) { stencil_buffer = agx_map_surface_resource(zsbuf, agx_resource(zsbuf->texture)->separate_stencil); - cfg.depth_flags |= 0x40000; - if (!should_clear_stencil) cfg.depth_flags |= 0x4000; + cfg.zls_control.s_store_enable = true; + cfg.zls_control.s_load_enable = !should_clear_stencil; } cfg.depth_buffer_if_clearing = depth_buffer; -- GitLab From 4e8a586fd3327f5b0e444a52fb05158b1058ee47 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 12 Sep 2022 22:22:56 -0400 Subject: [PATCH 03/17] asahi: Identify CDM block types Same enum as PowerVR CDM, annoyingly different from the VDM block types. Split out the stream link / terminate structs (both observed with Metal for copious amounts of compute), in preparation for decoding "properly". Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 24 ++++++++++++++++++++---- src/asahi/lib/decode.c | 6 +++--- src/gallium/drivers/asahi/agx_state.c | 2 +- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index b459a1ec4a38..66aaa381e596 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -661,23 +661,29 @@ - + - + + + + + + - + + @@ -685,7 +691,17 @@ - + + + + + + + + + + + diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index ce84ea0d6e3f..728f3a72cf02 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -550,14 +550,14 @@ agxdecode_cmd(const uint8_t *map, uint64_t *link, bool verbose) } case AGX_VDM_BLOCK_TYPE_STREAM_LINK: { - agx_unpack(agxdecode_dump_stream, map, STREAM_LINK, hdr); - DUMP_UNPACKED(STREAM_LINK, hdr, "Stream Link\n"); + agx_unpack(agxdecode_dump_stream, map, VDM_STREAM_LINK, hdr); + DUMP_UNPACKED(VDM_STREAM_LINK, hdr, "Stream Link\n"); *link = hdr.target_lo | (((uint64_t) hdr.target_hi) << 32); return STATE_LINK; } case AGX_VDM_BLOCK_TYPE_STREAM_TERMINATE: { - DUMP_CL(STREAM_TERMINATE, map, "Stream Terminate"); + DUMP_CL(VDM_STREAM_TERMINATE, map, "Stream Terminate"); return STATE_DONE; } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index c887c8577aea..ffeb4edc346e 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1792,7 +1792,7 @@ agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space) struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, size, 256); /* Jump from the old command buffer to the new command buffer */ - agx_pack(batch->encoder_current, STREAM_LINK, cfg) { + agx_pack(batch->encoder_current, VDM_STREAM_LINK, cfg) { cfg.target_lo = T.gpu & BITFIELD_MASK(32); cfg.target_hi = T.gpu >> 32; } -- GitLab From 287a0d4f40aa1cd48901a085e1a2afb2fde79fda Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 12 Sep 2022 22:34:12 -0400 Subject: [PATCH 04/17] asahi: Decode CDM commands separate from VDM This gets correct handling of CDM stream link/terminate, which are encoded in a slightly different way from VDM. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/decode.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index 728f3a72cf02..ccfc6ba09476 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -468,16 +468,42 @@ agxdecode_record(uint64_t va, size_t size, bool verbose) } static unsigned -agxdecode_cmd(const uint8_t *map, uint64_t *link, bool verbose) +agxdecode_cdm(const uint8_t *map, uint64_t *link, bool verbose) { - if (map[0] == 0x02 && map[1] == 0x10 && map[2] == 0x00 && map[3] == 0x00) { - /* XXX: This is a CDM command not a VDM one */ + /* Bits 29-31 contain the block type */ + enum agx_cdm_block_type block_type = (map[3] >> 5); + + switch (block_type) { + case AGX_CDM_BLOCK_TYPE_COMPUTE_KERNEL: { agx_unpack(agxdecode_dump_stream, map, LAUNCH, cmd); agxdecode_stateful(cmd.pipeline, "Pipeline", agxdecode_pipeline, verbose); DUMP_UNPACKED(LAUNCH, cmd, "Launch\n"); return AGX_LAUNCH_LENGTH; } + case AGX_CDM_BLOCK_TYPE_STREAM_LINK: { + agx_unpack(agxdecode_dump_stream, map, CDM_STREAM_LINK, hdr); + DUMP_UNPACKED(CDM_STREAM_LINK, hdr, "Stream Link\n"); + *link = hdr.target_lo | (((uint64_t) hdr.target_hi) << 32); + return STATE_LINK; + } + + case AGX_CDM_BLOCK_TYPE_STREAM_TERMINATE: { + DUMP_CL(CDM_STREAM_TERMINATE, map, "Stream Terminate"); + return STATE_DONE; + } + + default: + fprintf(agxdecode_dump_stream, "Unknown CDM block type: %u\n", + block_type); + hexdump(agxdecode_dump_stream, map, 8, false); + return 8; + } +} + +static unsigned +agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose) +{ /* Bits 29-31 contain the block type */ enum agx_vdm_block_type block_type = (map[3] >> 5); @@ -617,7 +643,11 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) } uint64_t *encoder = ((uint64_t *) cmdbuf->ptr.cpu) + 7; - agxdecode_stateful(*encoder, "Encoder", agxdecode_cmd, verbose); + + if (cmd.unk_5 == 3) + agxdecode_stateful(*encoder, "Encoder", agxdecode_cdm, verbose); + else + agxdecode_stateful(*encoder, "Encoder", agxdecode_vdm, verbose); if (pip.clear_pipeline_unk) { fprintf(agxdecode_dump_stream, "Unk: %X\n", pip.clear_pipeline_unk); -- GitLab From 58d138334d92c18d183873fd70a0481f4edf7535 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 11 Sep 2022 12:03:01 -0400 Subject: [PATCH 05/17] asahi: Shuffle IOGPU structs We need the header to be common between gfx and compute, but everything else seems to be different. Shuffle so we can decode compute without any terrible hacks. I don't know the exact layout and don't care: the layout of the fields here is all software defined in macOS, even though the *values* are defined by hardware (or firmware in a few cases). Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 196 +++++++++++++++--------------- src/asahi/lib/decode.c | 50 +++----- src/gallium/drivers/asahi/magic.c | 41 +++---- 3 files changed, 129 insertions(+), 158 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 66aaa381e596..be3970782d36 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -714,33 +714,6 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -773,84 +746,105 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + - - - - + + + + - - - + + + - - - - - - - - - - - - - + + + + + + + + + + + + + + diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index ccfc6ba09476..d8e343e26e43 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -611,26 +611,10 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) /* Print the IOGPU stuff */ agx_unpack(agxdecode_dump_stream, cmdbuf->ptr.cpu, IOGPU_HEADER, cmd); DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n"); - agx_unpack(agxdecode_dump_stream, ((uint32_t *) cmdbuf->ptr.cpu) + 160, - IOGPU_INTERNAL_PIPELINES, pip); - DUMP_CL(IOGPU_INTERNAL_PIPELINES, ((uint32_t *) cmdbuf->ptr.cpu) + 160, "Internal pipelines"); - DUMP_CL(IOGPU_AUX_FRAMEBUFFER, ((uint32_t *) cmdbuf->ptr.cpu) + 228, "Aux Framebuffer"); - - agx_unpack(agxdecode_dump_stream, ((uint32_t *) cmdbuf->ptr.cpu) + 292, - IOGPU_CLEAR_Z_S, clearzs); - DUMP_UNPACKED(IOGPU_CLEAR_Z_S, clearzs, "Clear Z/S"); - - /* Guard against changes */ - uint32_t zeroes[356 - 344] = { 0 }; - assert(memcmp(((uint32_t *) cmdbuf->ptr.cpu) + 344, zeroes, 4 * (356 - 344)) == 0); - - DUMP_CL(IOGPU_MISC, ((uint32_t *) cmdbuf->ptr.cpu) + 356, "Misc"); - - /* Should be unused, we think */ - for (unsigned i = (0x6B0 / 4); i < (cmd.attachment_offset / 4); ++i) { - assert(((uint32_t *) cmdbuf->ptr.cpu)[i] == 0); - } + agx_unpack(agxdecode_dump_stream, ((uint32_t *) cmdbuf->ptr.cpu) + 16, + IOGPU_GRAPHICS, gfx); + DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n"); DUMP_CL(IOGPU_ATTACHMENT_COUNT, ((uint8_t *) cmdbuf->ptr.cpu + cmd.attachment_offset), "Attachment count"); @@ -642,33 +626,31 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) DUMP_CL(IOGPU_ATTACHMENT, ptr, "Attachment"); } - uint64_t *encoder = ((uint64_t *) cmdbuf->ptr.cpu) + 7; - if (cmd.unk_5 == 3) - agxdecode_stateful(*encoder, "Encoder", agxdecode_cdm, verbose); + agxdecode_stateful(cmd.encoder, "Encoder", agxdecode_cdm, verbose); else - agxdecode_stateful(*encoder, "Encoder", agxdecode_vdm, verbose); + agxdecode_stateful(cmd.encoder, "Encoder", agxdecode_vdm, verbose); - if (pip.clear_pipeline_unk) { - fprintf(agxdecode_dump_stream, "Unk: %X\n", pip.clear_pipeline_unk); - agxdecode_stateful(pip.clear_pipeline, "Clear pipeline", + if (gfx.clear_pipeline_unk) { + fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk); + agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", agxdecode_pipeline, verbose); } - if (pip.store_pipeline_unk) { - assert(pip.store_pipeline_unk == 0x4); - agxdecode_stateful(pip.store_pipeline, "Store pipeline", + if (gfx.store_pipeline_unk) { + assert(gfx.store_pipeline_unk == 0x4); + agxdecode_stateful(gfx.store_pipeline, "Store pipeline", agxdecode_pipeline, verbose); } - assert((clearzs.partial_reload_pipeline_unk & 0xF) == 0x4); - if (clearzs.partial_reload_pipeline) { - agxdecode_stateful(clearzs.partial_reload_pipeline, + assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4); + if (gfx.partial_reload_pipeline) { + agxdecode_stateful(gfx.partial_reload_pipeline, "Partial reload pipeline", agxdecode_pipeline, verbose); } - if (clearzs.partial_store_pipeline) { - agxdecode_stateful(clearzs.partial_store_pipeline, + if (gfx.partial_store_pipeline) { + agxdecode_stateful(gfx.partial_store_pipeline, "Partial store pipeline", agxdecode_pipeline, verbose); } diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c index 767001becbf7..6964eea8cea9 100644 --- a/src/gallium/drivers/asahi/magic.c +++ b/src/gallium/drivers/asahi/magic.c @@ -186,7 +186,13 @@ demo_cmdbuf(uint64_t *buf, size_t size, uint64_t depth_buffer = 0; uint64_t stencil_buffer = 0; - agx_pack(map + 160, IOGPU_INTERNAL_PIPELINES, cfg) { + agx_pack(map + 16, IOGPU_GRAPHICS, cfg) { + cfg.opengl_depth_clipping = true; + + cfg.deflake_1 = deflake_1; + cfg.deflake_2 = deflake_2; + cfg.deflake_3 = deflake_buffer; + cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0); cfg.clear_pipeline = pipeline_clear; @@ -223,9 +229,6 @@ demo_cmdbuf(uint64_t *buf, size_t size, cfg.zls_control.s_load_enable = !should_clear_stencil; } - cfg.depth_buffer_if_clearing = depth_buffer; - cfg.stencil_buffer = stencil_buffer; - /* It's unclear how tile size is conveyed for depth/stencil targets, * which interactions with mipmapping (for example of a 33x33 * depth/stencil attachment) @@ -233,18 +236,17 @@ demo_cmdbuf(uint64_t *buf, size_t size, if (zsbuf->u.tex.level != 0) unreachable("todo: mapping other levels"); - cfg.depth_buffer = depth_buffer; + cfg.depth_buffer_1 = depth_buffer; + cfg.depth_buffer_2 = depth_buffer; + + cfg.stencil_buffer_1 = stencil_buffer; cfg.stencil_buffer_2 = stencil_buffer; } - } - agx_pack(map + 228, IOGPU_AUX_FRAMEBUFFER, cfg) { - cfg.width = framebuffer->width; - cfg.height = framebuffer->height; + cfg.width_1 = framebuffer->width; + cfg.height_1 = framebuffer->height; cfg.pointer = unk_buffer_2; - } - agx_pack(map + 292, IOGPU_CLEAR_Z_S, cfg) { cfg.set_when_reloading_z_or_s_1 = clear_pipeline_textures; if (depth_buffer && !should_clear_depth) { @@ -265,16 +267,14 @@ demo_cmdbuf(uint64_t *buf, size_t size, cfg.partial_store_pipeline_bind = 0x12; cfg.partial_store_pipeline = pipeline_store; - } - agx_pack(map + 356, IOGPU_MISC, cfg) { - cfg.depth_buffer = depth_buffer; - cfg.stencil_buffer = stencil_buffer; + cfg.depth_buffer_3 = depth_buffer; + cfg.stencil_buffer_3 = stencil_buffer; cfg.encoder_id = encoder_id; cfg.unknown_buffer = demo_unk6(pool); - cfg.width = framebuffer->width; - cfg.height = framebuffer->height; - cfg.unk_80 = clear_pipeline_textures ? 0x0 : 0x1; + cfg.width_2 = framebuffer->width; + cfg.height_2 = framebuffer->height; + cfg.unk_352 = clear_pipeline_textures ? 0x0 : 0x1; } unsigned offset_unk = (484 * 4); @@ -294,11 +294,6 @@ demo_cmdbuf(uint64_t *buf, size_t size, cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH; cfg.unknown_offset = offset_unk; cfg.encoder = encoder_ptr; - cfg.opengl_depth_clipping = true; - - cfg.deflake_1 = deflake_1; - cfg.deflake_2 = deflake_2; - cfg.deflake_3 = deflake_buffer; } return total_size; -- GitLab From a9c26df462341d29da827dcd9d4f2d60b72aa625 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 14 Sep 2022 20:07:54 -0400 Subject: [PATCH 06/17] asahi: Identify IOGPU compute header Much simpler than the graphics one. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index be3970782d36..c25e9a8f6ed8 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -759,6 +759,26 @@ + + + + + + + + + + + + + + + + + + + + -- GitLab From adfd213241869739d8f4691eb91f6736ccb625ec Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 14 Sep 2022 20:08:23 -0400 Subject: [PATCH 07/17] asahi: Decode IOGPU compute header Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/decode.c | 72 +++++++++++++++++++++++++----------------- 1 file changed, 43 insertions(+), 29 deletions(-) diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index d8e343e26e43..a0c3835967c6 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -595,6 +595,47 @@ agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose) } } +static void +agxdecode_cs(uint32_t *cmdbuf, uint64_t encoder, bool verbose) +{ + agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_COMPUTE, cs); + DUMP_UNPACKED(IOGPU_COMPUTE, cs, "Compute\n"); + + agxdecode_stateful(encoder, "Encoder", agxdecode_cdm, verbose); +} + +static void +agxdecode_gfx(uint32_t *cmdbuf, uint64_t encoder, bool verbose) +{ + agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_GRAPHICS, gfx); + DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n"); + + agxdecode_stateful(encoder, "Encoder", agxdecode_vdm, verbose); + + if (gfx.clear_pipeline_unk) { + fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk); + agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", + agxdecode_pipeline, verbose); + } + + if (gfx.store_pipeline_unk) { + assert(gfx.store_pipeline_unk == 0x4); + agxdecode_stateful(gfx.store_pipeline, "Store pipeline", + agxdecode_pipeline, verbose); + } + + assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4); + if (gfx.partial_reload_pipeline) { + agxdecode_stateful(gfx.partial_reload_pipeline, + "Partial reload pipeline", agxdecode_pipeline, verbose); + } + + if (gfx.partial_store_pipeline) { + agxdecode_stateful(gfx.partial_store_pipeline, + "Partial store pipeline", agxdecode_pipeline, verbose); + } +} + void agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) { @@ -612,10 +653,6 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) agx_unpack(agxdecode_dump_stream, cmdbuf->ptr.cpu, IOGPU_HEADER, cmd); DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n"); - agx_unpack(agxdecode_dump_stream, ((uint32_t *) cmdbuf->ptr.cpu) + 16, - IOGPU_GRAPHICS, gfx); - DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n"); - DUMP_CL(IOGPU_ATTACHMENT_COUNT, ((uint8_t *) cmdbuf->ptr.cpu + cmd.attachment_offset), "Attachment count"); @@ -627,32 +664,9 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) } if (cmd.unk_5 == 3) - agxdecode_stateful(cmd.encoder, "Encoder", agxdecode_cdm, verbose); + agxdecode_cs((uint32_t *) cmdbuf->ptr.cpu, cmd.encoder, verbose); else - agxdecode_stateful(cmd.encoder, "Encoder", agxdecode_vdm, verbose); - - if (gfx.clear_pipeline_unk) { - fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk); - agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", - agxdecode_pipeline, verbose); - } - - if (gfx.store_pipeline_unk) { - assert(gfx.store_pipeline_unk == 0x4); - agxdecode_stateful(gfx.store_pipeline, "Store pipeline", - agxdecode_pipeline, verbose); - } - - assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4); - if (gfx.partial_reload_pipeline) { - agxdecode_stateful(gfx.partial_reload_pipeline, - "Partial reload pipeline", agxdecode_pipeline, verbose); - } - - if (gfx.partial_store_pipeline) { - agxdecode_stateful(gfx.partial_store_pipeline, - "Partial store pipeline", agxdecode_pipeline, verbose); - } + agxdecode_gfx((uint32_t *) cmdbuf->ptr.cpu, cmd.encoder, verbose); agxdecode_map_read_write(); } -- GitLab From dc05b042abc246a153db510e019613a418022014 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 20 Aug 2022 13:34:30 -0400 Subject: [PATCH 08/17] asahi: Assert that u_transfer_helper is well-behaved Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/asahi/agx_pipe.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 9b59e235b386..c0aa7d84a924 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -169,6 +169,10 @@ agx_resource_create(struct pipe_screen *screen, nresource->mipmapped = (templ->last_level > 0); nresource->internal_format = nresource->base.format; + assert(templ->format != PIPE_FORMAT_Z24X8_UNORM && + templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT && + "u_transfer_helper should have lowered"); + nresource->layout = (struct ail_layout) { .tiling = (nresource->modifier == DRM_FORMAT_MOD_LINEAR) ? AIL_TILING_LINEAR : AIL_TILING_TWIDDLED, -- GitLab From 22de011675c14716342a537e7508d5bc78d0d437 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 18 Aug 2022 22:48:12 -0400 Subject: [PATCH 09/17] asahi: Use the internal format internally Confusingly, after creation rsrc->base.format will contain the external format due to u_transfer_helper quirks. For our internal use, we need to look at the internal format, rsrc->layout.format. With the new layout code, the rsrc->internal_format property is redundant, so we delete that to reduce confusion. Fixes dEQP-GLES3.functional.texture.format.sized.2d.depth32f_stencil8_* Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/asahi/agx_pipe.c | 7 +++---- src/gallium/drivers/asahi/agx_state.h | 3 --- src/gallium/drivers/asahi/magic.c | 8 ++++++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index c0aa7d84a924..105a1443424d 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -167,7 +167,6 @@ agx_resource_create(struct pipe_screen *screen, nresource->modifier = agx_select_modifier(nresource); nresource->mipmapped = (templ->last_level > 0); - nresource->internal_format = nresource->base.format; assert(templ->format != PIPE_FORMAT_Z24X8_UNORM && templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT && @@ -286,10 +285,10 @@ agx_transfer_map(struct pipe_context *pctx, if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED) { transfer->base.stride = - util_format_get_stride(resource->format, box->width); + util_format_get_stride(rsrc->layout.format, box->width); transfer->base.layer_stride = - util_format_get_2d_size(resource->format, transfer->base.stride, + util_format_get_2d_size(rsrc->layout.format, transfer->base.stride, box->height); transfer->map = calloc(transfer->base.layer_stride, box->depth); @@ -1143,7 +1142,7 @@ agx_resource_get_stencil(struct pipe_resource *prsrc) static enum pipe_format agx_resource_get_internal_format(struct pipe_resource *prsrc) { - return agx_resource(prsrc)->internal_format; + return agx_resource(prsrc)->layout.format; } static const struct u_transfer_vtbl transfer_vtbl = { diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 862f069f0706..868ef1985df6 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -294,9 +294,6 @@ struct agx_resource { * resources. */ struct agx_resource *separate_stencil; - - /* Internal format, since many depth/stencil formats are emulated. */ - enum pipe_format internal_format; }; static inline struct agx_resource * diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c index 6964eea8cea9..722a67b2b226 100644 --- a/src/gallium/drivers/asahi/magic.c +++ b/src/gallium/drivers/asahi/magic.c @@ -116,7 +116,7 @@ asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc, assert(surf->u.tex.first_layer == surf->u.tex.last_layer); agx_pack(out, IOGPU_ATTACHMENT, cfg) { - cfg.type = asahi_classify_attachment(rsrc->base.format); + cfg.type = asahi_classify_attachment(rsrc->layout.format); cfg.address = agx_map_surface_resource(surf, rsrc); cfg.size = rsrc->layout.size_B; cfg.percent = (100 * cfg.size) / total_size; @@ -205,7 +205,11 @@ demo_cmdbuf(uint64_t *buf, size_t size, if (framebuffer->zsbuf) { struct pipe_surface *zsbuf = framebuffer->zsbuf; const struct util_format_description *desc = - util_format_description(zsbuf->texture->format); + util_format_description(agx_resource(zsbuf->texture)->layout.format); + + assert(desc->format == PIPE_FORMAT_Z32_FLOAT || + desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + desc->format == PIPE_FORMAT_S8_UINT); cfg.depth_width = framebuffer->width; cfg.depth_height = framebuffer->height; -- GitLab From 2fbe1ae09c22f5b58ba1c347e87750f17a1265a7 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Thu, 15 Sep 2022 18:19:25 -0400 Subject: [PATCH 10/17] asahi: Identify spill buffer histogram Histogram of sizes of the spill buffer, with logarithmic bucket sizes (relative to the amount spilled from the perspective of a single thread). Pretty funny. Also mark a few unknowns that are nonzero when spilling is used. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index c25e9a8f6ed8..49ffd5b5c720 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -682,6 +682,7 @@ + @@ -759,6 +760,24 @@ + + + + + + + + + + + + + + + + + + @@ -773,7 +792,9 @@ + + @@ -821,11 +842,16 @@ + + + + + -- GitLab From 43ed48d1695f2b2a4da7078bedc82670b1303c81 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 3 Sep 2022 14:03:03 -0400 Subject: [PATCH 11/17] asahi: Simplify IOGPU attachment packing Give bigger ranges, it's simpler and less broken for layered framebuffers. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/gallium/drivers/asahi/magic.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c index 722a67b2b226..a5dfd62daa33 100644 --- a/src/gallium/drivers/asahi/magic.c +++ b/src/gallium/drivers/asahi/magic.c @@ -109,15 +109,11 @@ agx_map_surface(struct pipe_surface *surf) static void asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc, - struct pipe_surface *surf, unsigned total_size) { - /* We don't support layered rendering yet */ - assert(surf->u.tex.first_layer == surf->u.tex.last_layer); - agx_pack(out, IOGPU_ATTACHMENT, cfg) { cfg.type = asahi_classify_attachment(rsrc->layout.format); - cfg.address = agx_map_surface_resource(surf, rsrc); + cfg.address = rsrc->bo->ptr.gpu; cfg.size = rsrc->layout.size_B; cfg.percent = (100 * cfg.size) / total_size; } @@ -133,7 +129,6 @@ asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuff for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) { asahi_pack_iogpu_attachment(attachments + (nr++), agx_resource(framebuffer->cbufs[i]->texture), - framebuffer->cbufs[i], total_attachment_size); } @@ -141,13 +136,11 @@ asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuff struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture); asahi_pack_iogpu_attachment(attachments + (nr++), - rsrc, framebuffer->zsbuf, - total_attachment_size); + rsrc, total_attachment_size); if (rsrc->separate_stencil) { asahi_pack_iogpu_attachment(attachments + (nr++), rsrc->separate_stencil, - framebuffer->zsbuf, total_attachment_size); } } -- GitLab From 09cc736c42465c5b4ba2e970b680048339139dba Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Wed, 14 Sep 2022 22:06:56 -0400 Subject: [PATCH 12/17] asahi: Identify shared memory fields For compute kernels, this encodes how much workgroup-local memory is used ("shared memory" or "threadgroup memory" or "local memory"). This memory is partitioned by the hardware. For fragment shaders, this... encodes exactly the same thing. There is no traditional tilebuffer in AGX, instead local memory is interpreted as an imageblock, where each workgroup is a tile. This is a nifty design. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 8 ++++++-- src/gallium/drivers/asahi/agx_state.c | 6 ++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 49ffd5b5c720..2e15582023af 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -502,7 +502,9 @@ - + + + - + + + + + + + + - + @@ -480,7 +487,7 @@ - + @@ -488,7 +495,7 @@ - + @@ -501,7 +508,7 @@ - + @@ -536,7 +543,7 @@ - + @@ -558,6 +565,8 @@ + + -- GitLab From 35d5558fa55711819bdc46f1ef3317d82dc80688 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 17 Sep 2022 11:19:52 -0400 Subject: [PATCH 14/17] asahi/genxml: Overflow up to words when packing So we can pack things that aren't 4-byte sized. Note this doesn't help with alignment. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/gen_pack.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/asahi/lib/gen_pack.py b/src/asahi/lib/gen_pack.py index 430daeba5ba6..a8d839e0e2ce 100644 --- a/src/asahi/lib/gen_pack.py +++ b/src/asahi/lib/gen_pack.py @@ -26,6 +26,7 @@ import xml.parsers.expat import sys import operator +import math from functools import reduce global_prefix = "agx" @@ -369,7 +370,7 @@ class Group(object): elif field.modifier[0] == "log2": print(" assert(util_is_power_of_two_nonzero(values->{}));".format(field.name)) - for index in range(self.length // 4): + for index in range(math.ceil(self.length / 4)): # Handle MBZ words if not index in words: print(" cl[%2d] = 0;" % index) -- GitLab From 933a9e350e154a7264f03d568476885e814130c2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 17 Sep 2022 11:22:01 -0400 Subject: [PATCH 15/17] asahi: Overhaul USC control packing Break up the monolithic SET_SHADER_EXTENDED packet into the separate underlying commands (some only 2-byte sized and aligned), and add a builder for USC control streams like we did for PPP updates to make that change manageable. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 113 +++++------- src/asahi/lib/decode.c | 136 +++++++------- src/gallium/drivers/asahi/agx_state.c | 254 ++++++++++++++------------ 3 files changed, 247 insertions(+), 256 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 9d15ab320c2b..c9b2564cdc45 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -471,13 +471,18 @@ + + + + - + + - + @@ -486,7 +491,7 @@ - + @@ -494,7 +499,7 @@ - + @@ -502,69 +507,51 @@ - - - - + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + - diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index a0c3835967c6..242a6ea68d99 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -305,64 +305,63 @@ agxdecode_stateful(uint64_t va, const char *label, decode_cmd decoder, bool verb } } -unsigned COUNTER = 0; static unsigned -agxdecode_pipeline(const uint8_t *map, uint64_t *link, UNUSED bool verbose) +agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose) { - uint8_t zeroes[16] = { 0 }; + enum agx_usc_control type = map[0]; - if (map[0] == 0x4D && (map[11] & BITFIELD_BIT(5))) { - agx_unpack(agxdecode_dump_stream, map, SET_SHADER_EXTENDED, cmd); - DUMP_UNPACKED(SET_SHADER_EXTENDED, cmd, "Set shader\n"); +#define USC_CASE(name, human) \ + case AGX_USC_CONTROL_##name: { \ + DUMP_CL(USC_##name, map, human); \ + return AGX_USC_##name##_LENGTH; \ + } - if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) { - agxdecode_log("Preshader\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.preshader_code, 2048), - 2048, agxdecode_dump_stream); - agxdecode_log("\n---\n"); - } + switch (type) { + case AGX_USC_CONTROL_NO_PRESHADER: { + DUMP_CL(USC_NO_PRESHADER, map, "No preshader"); + return STATE_DONE; + } + + case AGX_USC_CONTROL_PRESHADER: { + agx_unpack(agxdecode_dump_stream, map, USC_PRESHADER, ctrl); + DUMP_UNPACKED(USC_PRESHADER, ctrl, "Preshader\n"); + + agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048), + 8192, agxdecode_dump_stream); + + return STATE_DONE; + } + + case AGX_USC_CONTROL_SHADER: { + agx_unpack(agxdecode_dump_stream, map, USC_SHADER, ctrl); + DUMP_UNPACKED(USC_SHADER, ctrl, "Shader\n"); agxdecode_log("\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.code, 2048), - 2048, agxdecode_dump_stream); + agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048), + 8192, agxdecode_dump_stream); agxdecode_log("\n"); - char *name; - asprintf(&name, "file%u.bin", COUNTER++); - FILE *fp = fopen(name, "wb"); - fwrite(agxdecode_fetch_gpu_mem(cmd.code, 2048), 1, 2048, fp); - fclose(fp); - free(name); - agxdecode_log("\n"); + return AGX_USC_SHADER_LENGTH; + } - return AGX_SET_SHADER_EXTENDED_LENGTH; - } else if (map[0] == 0x4D) { - agx_unpack(agxdecode_dump_stream, map, SET_SHADER, cmd); - DUMP_UNPACKED(SET_SHADER, cmd, "Set shader\n"); - fflush(agxdecode_dump_stream); + case AGX_USC_CONTROL_SAMPLER: { + agx_unpack(agxdecode_dump_stream, map, USC_SAMPLER, temp); + DUMP_UNPACKED(USC_SAMPLER, temp, "Sampler state\n"); - if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) { - agxdecode_log("Preshader\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.preshader_code, 2048), - 2048, agxdecode_dump_stream); - agxdecode_log("\n---\n"); + uint8_t *samp = agxdecode_fetch_gpu_mem(temp.buffer, + AGX_SAMPLER_LENGTH * temp.count); + + for (unsigned i = 0; i < temp.count; ++i) { + DUMP_CL(SAMPLER, samp, "Sampler"); + samp += AGX_SAMPLER_LENGTH; } - agxdecode_log("\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.code, 2048), - 2048, agxdecode_dump_stream); - char *name; - asprintf(&name, "file%u.bin", COUNTER++); - FILE *fp = fopen(name, "wb"); - fwrite(agxdecode_fetch_gpu_mem(cmd.code, 2048), 1, 2048, fp); - fclose(fp); - free(name); - agxdecode_log("\n"); + return AGX_USC_SAMPLER_LENGTH; + } - return AGX_SET_SHADER_LENGTH; - } else if (map[0] == 0xDD) { - agx_unpack(agxdecode_dump_stream, map, BIND_TEXTURE, temp); - DUMP_UNPACKED(BIND_TEXTURE, temp, "Bind texture\n"); + case AGX_USC_CONTROL_TEXTURE: { + agx_unpack(agxdecode_dump_stream, map, USC_TEXTURE, temp); + DUMP_UNPACKED(USC_TEXTURE, temp, "Texture state\n"); uint8_t *tex = agxdecode_fetch_gpu_mem(temp.buffer, AGX_TEXTURE_LENGTH * temp.count); @@ -376,29 +375,22 @@ agxdecode_pipeline(const uint8_t *map, uint64_t *link, UNUSED bool verbose) tex += AGX_TEXTURE_LENGTH; } - return AGX_BIND_TEXTURE_LENGTH; - } else if (map[0] == 0x9D) { - agx_unpack(agxdecode_dump_stream, map, BIND_SAMPLER, temp); - DUMP_UNPACKED(BIND_SAMPLER, temp, "Bind sampler\n"); + return AGX_USC_TEXTURE_LENGTH; + } - uint8_t *samp = agxdecode_fetch_gpu_mem(temp.buffer, - AGX_SAMPLER_LENGTH * temp.count); + USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties"); + USC_CASE(UNIFORM, "Uniform"); + USC_CASE(SHARED, "Shared"); + USC_CASE(REGISTERS, "Registers"); - for (unsigned i = 0; i < temp.count; ++i) { - DUMP_CL(SAMPLER, samp, "Sampler"); - samp += AGX_SAMPLER_LENGTH; - } - - return AGX_BIND_SAMPLER_LENGTH; - } else if (map[0] == 0x1D) { - DUMP_CL(BIND_UNIFORM, map, "Bind uniform"); - return AGX_BIND_UNIFORM_LENGTH; - } else if (memcmp(map, zeroes, 16) == 0) { - /* TODO: Termination */ - return STATE_DONE; - } else { - return 0; + default: + fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n", + type); + hexdump(agxdecode_dump_stream, map, 8, false); + return 8; } + +#undef USC_CASE } #define PPP_PRINT(map, header_name, struct_name, human) \ @@ -438,7 +430,7 @@ agxdecode_record(uint64_t va, size_t size, bool verbose) if (hdr.fragment_shader) { agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER, frag); - agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_pipeline, verbose); + agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_usc, verbose); if (frag.cf_bindings) { uint8_t *cf = agxdecode_fetch_gpu_mem(frag.cf_bindings, 128); @@ -476,7 +468,7 @@ agxdecode_cdm(const uint8_t *map, uint64_t *link, bool verbose) switch (block_type) { case AGX_CDM_BLOCK_TYPE_COMPUTE_KERNEL: { agx_unpack(agxdecode_dump_stream, map, LAUNCH, cmd); - agxdecode_stateful(cmd.pipeline, "Pipeline", agxdecode_pipeline, verbose); + agxdecode_stateful(cmd.pipeline, "Pipeline", agxdecode_usc, verbose); DUMP_UNPACKED(LAUNCH, cmd, "Launch\n"); return AGX_LAUNCH_LENGTH; } @@ -541,7 +533,7 @@ agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose) agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_1, word_1); fprintf(agxdecode_dump_stream, "Pipeline %X\n", (uint32_t) word_1.pipeline); - agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_pipeline, verbose); + agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_usc, verbose); } VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1, "Vertex shader word 1"); @@ -615,24 +607,24 @@ agxdecode_gfx(uint32_t *cmdbuf, uint64_t encoder, bool verbose) if (gfx.clear_pipeline_unk) { fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk); agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", - agxdecode_pipeline, verbose); + agxdecode_usc, verbose); } if (gfx.store_pipeline_unk) { assert(gfx.store_pipeline_unk == 0x4); agxdecode_stateful(gfx.store_pipeline, "Store pipeline", - agxdecode_pipeline, verbose); + agxdecode_usc, verbose); } assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4); if (gfx.partial_reload_pipeline) { agxdecode_stateful(gfx.partial_reload_pipeline, - "Partial reload pipeline", agxdecode_pipeline, verbose); + "Partial reload pipeline", agxdecode_usc, verbose); } if (gfx.partial_store_pipeline) { agxdecode_stateful(gfx.partial_store_pipeline, - "Partial store pipeline", agxdecode_pipeline, verbose); + "Partial store pipeline", agxdecode_usc, verbose); } } diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 23be68e6e827..7a324d87faf1 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1193,20 +1193,68 @@ agx_delete_shader_state(struct pipe_context *ctx, free(so); } -/* Pipeline consists of a sequence of binding commands followed by a set shader command */ -static uint32_t -agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) +struct agx_usc_builder { + struct agx_ptr T; + uint8_t *head; + +#ifndef NDEBUG + size_t size; +#endif +}; + +static struct agx_usc_builder +agx_alloc_usc_control(struct agx_pool *pool, + unsigned num_reg_bindings) +{ + STATIC_ASSERT(AGX_USC_TEXTURE_LENGTH == AGX_USC_UNIFORM_LENGTH); + STATIC_ASSERT(AGX_USC_SAMPLER_LENGTH == AGX_USC_UNIFORM_LENGTH); + + size_t size = AGX_USC_UNIFORM_LENGTH * num_reg_bindings; + + size += AGX_USC_SHARED_LENGTH; + size += AGX_USC_SHADER_LENGTH; + size += AGX_USC_REGISTERS_LENGTH; + size += MAX2(AGX_USC_NO_PRESHADER_LENGTH, AGX_USC_PRESHADER_LENGTH); + size += AGX_USC_FRAGMENT_PROPERTIES_LENGTH; + + struct agx_usc_builder b = { + .T = agx_pool_alloc_aligned(pool, size, 64), + +#ifndef NDEBUG + .size = size, +#endif + }; + + b.head = (uint8_t *) b.T.cpu; + + return b; +} + +static bool +agx_usc_builder_validate(struct agx_usc_builder *b, size_t size) { - /* Pipelines must be 64-byte aligned */ - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (cs->info.push_ranges * AGX_BIND_UNIFORM_LENGTH) + - AGX_BIND_TEXTURE_LENGTH + - AGX_BIND_SAMPLER_LENGTH + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); +#ifndef NDEBUG + assert(((b->head - (uint8_t *) b->T.cpu) + size) <= b->size); +#endif - uint8_t *record = ptr.cpu; + return true; +} +#define agx_usc_pack(b, struct_name, template) \ + for (bool it = agx_usc_builder_validate((b), AGX_USC_##struct_name##_LENGTH); \ + it; it = false, (b)->head += AGX_USC_##struct_name##_LENGTH) \ + agx_pack((b)->head, USC_##struct_name, template) + +static uint32_t +agx_usc_fini(struct agx_usc_builder *b) +{ + assert(b->T.gpu <= (1ull << 32) && "pipelines must be in low memory"); + return b->T.gpu; +} + +static uint32_t +agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) +{ unsigned nr_textures = ctx->stage[stage].texture_count; unsigned nr_samplers = ctx->stage[stage].sampler_count; @@ -1235,25 +1283,26 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum samplers[i] = sampler->desc; } + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, + cs->info.push_ranges + 2); + if (nr_textures) { - agx_pack(record, BIND_TEXTURE, cfg) { + agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = 0; cfg.count = nr_textures; cfg.buffer = T_tex.gpu; } ctx->batch->textures = T_tex.gpu; - record += AGX_BIND_TEXTURE_LENGTH; } if (nr_samplers) { - agx_pack(record, BIND_SAMPLER, cfg) { + agx_usc_pack(&b, SAMPLER, cfg) { cfg.start = 0; cfg.count = nr_samplers; cfg.buffer = T_samp.gpu; } - - record += AGX_BIND_SAMPLER_LENGTH; } /* Must only upload uniforms after uploading textures so we can implement the @@ -1262,97 +1311,81 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum for (unsigned i = 0; i < cs->info.push_ranges; ++i) { struct agx_push push = cs->info.push[i]; - agx_pack(record, BIND_UNIFORM, cfg) { + agx_usc_pack(&b, UNIFORM, cfg) { cfg.start_halfs = push.base; cfg.size_halfs = push.length; cfg.buffer = agx_push_location(ctx, push, stage); } - - record += AGX_BIND_UNIFORM_LENGTH; } - /* TODO: Can we prepack this? */ - if (stage == PIPE_SHADER_FRAGMENT) { - bool writes_sample_mask = ctx->fs->info.writes_sample_mask; - - agx_pack(record, SET_SHADER_EXTENDED, cfg) { - cfg.code = cs->bo->ptr.gpu; - cfg.register_quadwords = 0; - cfg.unk_3 = 0x8d; - cfg.unk_1 = 0x10bd; + agx_usc_pack(&b, SHARED, cfg) { + if (stage == PIPE_SHADER_FRAGMENT) { + cfg.uses_shared_memory = true; + cfg.unk_1 = 0x10bc; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; - cfg.unk_2 = 0x0d; - cfg.loads_varyings = true; - cfg.fragment_parameters.early_z_testing = !writes_sample_mask; - cfg.unk_4 = 0x800; - cfg.preshader_unk = 0xc080; - cfg.spill_size = 0x2; + } else { + cfg.unk_1 = 0x90; } + } - record += AGX_SET_SHADER_EXTENDED_LENGTH; - } else { - agx_pack(record, SET_SHADER, cfg) { - cfg.code = cs->bo->ptr.gpu; - cfg.register_quadwords = 0; - cfg.unk_2b = cs->info.varyings.vs.nr_index; - cfg.unk_2 = 0x0d; - } + agx_usc_pack(&b, SHADER, cfg) { + cfg.loads_varyings = (stage == PIPE_SHADER_FRAGMENT); + cfg.code = cs->bo->ptr.gpu; + cfg.unk_2 = (stage == PIPE_SHADER_FRAGMENT) ? 2 : 3; + } - record += AGX_SET_SHADER_LENGTH; + agx_usc_pack(&b, REGISTERS, cfg) { + cfg.register_quadwords = 0; + cfg.unk_1 = (stage == PIPE_SHADER_FRAGMENT); } - /* End pipeline */ - memset(record, 0, 8); - assert(ptr.gpu < (1ull << 32)); - return ptr.gpu; + if (stage == PIPE_SHADER_FRAGMENT) { + agx_usc_pack(&b, FRAGMENT_PROPERTIES, cfg) { + bool writes_sample_mask = ctx->fs->info.writes_sample_mask; + cfg.early_z_testing = !writes_sample_mask; + cfg.unk_4 = 0x2; + cfg.unk_5 = 0x0; + } + } + + agx_usc_pack(&b, NO_PRESHADER, cfg); + + return agx_usc_fini(&b); } /* Internal pipelines (TODO: refactor?) */ uint64_t agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf) { - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (1 * AGX_BIND_UNIFORM_LENGTH) + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, 1); - uint8_t *record = ptr.cpu; - - agx_pack(record, BIND_UNIFORM, cfg) { + agx_usc_pack(&b, UNIFORM, cfg) { cfg.start_halfs = (6 * 2); cfg.size_halfs = 4; cfg.buffer = clear_buf; } - record += AGX_BIND_UNIFORM_LENGTH; + agx_usc_pack(&b, SHARED, cfg) { + cfg.uses_shared_memory = true; + cfg.unk_1 = 0x10bc; + cfg.shared_memory_per_threadgroup_in_256_bytes = 32; + } - /* TODO: Can we prepack this? */ - agx_pack(record, SET_SHADER, cfg) { + agx_usc_pack(&b, SHADER, cfg) { cfg.code = code; - cfg.unk_1 = 0x10bd; - cfg.shared_memory_per_threadgroup_in_256_bytes = 32; - cfg.unk_2 = 0x0d; - cfg.unk_3 = 0x8d; - cfg.register_quadwords = 1; + cfg.unk_2 = 3; } - record += AGX_SET_SHADER_LENGTH; + agx_usc_pack(&b, REGISTERS, cfg) cfg.register_quadwords = 1; + agx_usc_pack(&b, NO_PRESHADER, cfg); - /* End pipeline */ - memset(record, 0, 8); - return ptr.gpu; + return agx_usc_fini(&b); } uint64_t agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_surface *surf) { - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (1 * AGX_BIND_TEXTURE_LENGTH) + - (1 * AGX_BIND_SAMPLER_LENGTH) + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); - - uint8_t *record = ptr.cpu; struct agx_ptr sampler = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_SAMPLER_LENGTH, 64); struct agx_ptr texture = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_TEXTURE_LENGTH, 64); @@ -1398,91 +1431,70 @@ agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_su cfg.unk_tiled = true; } - agx_pack(record, BIND_TEXTURE, cfg) { + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, 2); + + agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = texture.gpu; } - record += AGX_BIND_TEXTURE_LENGTH; - - agx_pack(record, BIND_SAMPLER, cfg) { + agx_usc_pack(&b, SAMPLER, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = sampler.gpu; } - record += AGX_BIND_SAMPLER_LENGTH; + agx_usc_pack(&b, SHARED, cfg) { + cfg.uses_shared_memory = true; + cfg.unk_1 = 0x10bc; + cfg.shared_memory_per_threadgroup_in_256_bytes = 32; + } - /* TODO: Can we prepack this? */ - agx_pack(record, SET_SHADER_EXTENDED, cfg) { + agx_usc_pack(&b, SHADER, cfg) { cfg.code = code; - cfg.register_quadwords = 0; - cfg.unk_3 = 0x8d; - cfg.unk_2 = 0x0d; - cfg.unk_4 = 0; - cfg.fragment_parameters.unk_1 = 0x880100; - cfg.fragment_parameters.early_z_testing = false; - cfg.fragment_parameters.unk_2 = false; - cfg.fragment_parameters.unk_3 = 0; - cfg.preshader_mode = 0; // XXX + cfg.unk_2 = 3; } - record += AGX_SET_SHADER_EXTENDED_LENGTH; + agx_usc_pack(&b, REGISTERS, cfg) cfg.register_quadwords = 0; + agx_usc_pack(&b, NO_PRESHADER, cfg); - /* End pipeline */ - memset(record, 0, 8); - return ptr.gpu; + return agx_usc_fini(&b); } uint64_t agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, uint64_t render_target) { - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (1 * AGX_BIND_TEXTURE_LENGTH) + - (1 * AGX_BIND_UNIFORM_LENGTH) + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, 2); - uint8_t *record = ptr.cpu; - - agx_pack(record, BIND_TEXTURE, cfg) { + agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = render_target; } - record += AGX_BIND_TEXTURE_LENGTH; - uint32_t unk[] = { 0, ~0 }; - agx_pack(record, BIND_UNIFORM, cfg) { + agx_usc_pack(&b, UNIFORM, cfg) { cfg.start_halfs = 4; cfg.size_halfs = 4; cfg.buffer = agx_pool_upload_aligned(&ctx->batch->pool, unk, sizeof(unk), 16); } - record += AGX_BIND_UNIFORM_LENGTH; - - /* TODO: Can we prepack this? */ - agx_pack(record, SET_SHADER_EXTENDED, cfg) { - cfg.code = code; - cfg.register_quadwords = 1; - cfg.unk_2 = 0xd; - cfg.unk_3 = 0x8d; - cfg.fragment_parameters.unk_1 = 0x880100; - cfg.fragment_parameters.early_z_testing = false; - cfg.fragment_parameters.unk_2 = false; - cfg.fragment_parameters.unk_3 = 0; - cfg.preshader_mode = 0; // XXX + agx_usc_pack(&b, SHARED, cfg) { + cfg.uses_shared_memory = true; + cfg.unk_1 = 0x10bc; + cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } - record += AGX_SET_SHADER_EXTENDED_LENGTH; + agx_usc_pack(&b, SHADER, cfg) cfg.code = code; + agx_usc_pack(&b, REGISTERS, cfg) cfg.register_quadwords = 1; + agx_usc_pack(&b, NO_PRESHADER, cfg); - /* End pipeline */ - memset(record, 0, 8); - return ptr.gpu; + return agx_usc_fini(&b); } void -- GitLab From b8b3c9fa2afbe5d410c06b16591a369ff3a04137 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 17 Sep 2022 16:53:16 -0400 Subject: [PATCH 16/17] asahi: Identify pixel stride Number of bytes in a pixel in the tilebuffer, does not depend on the tile size. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 3 ++- src/gallium/drivers/asahi/agx_state.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index c9b2564cdc45..1791f0740524 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -510,7 +510,8 @@ - + + diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 7a324d87faf1..790098f9f919 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1321,7 +1321,8 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum agx_usc_pack(&b, SHARED, cfg) { if (stage == PIPE_SHADER_FRAGMENT) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0x10bc; + cfg.unk_1 = 0xbc; + cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } else { cfg.unk_1 = 0x90; @@ -1368,7 +1369,8 @@ agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_ agx_usc_pack(&b, SHARED, cfg) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0x10bc; + cfg.unk_1 = 0xbc; + cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } @@ -1448,7 +1450,8 @@ agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_su agx_usc_pack(&b, SHARED, cfg) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0x10bc; + cfg.unk_1 = 0xbc; + cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } @@ -1486,7 +1489,8 @@ agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, agx_usc_pack(&b, SHARED, cfg) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0x10bc; + cfg.unk_1 = 0xbc; + cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } -- GitLab From bcd75a13e091e3a9a09cef63a6b9287dd73ca83f Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sat, 17 Sep 2022 17:14:17 -0400 Subject: [PATCH 17/17] asahi: Identify shared memory layouts Somehow maps to the tile size. Not sure about the details yet. Signed-off-by: Alyssa Rosenzweig Part-of: --- src/asahi/lib/cmdbuf.xml | 9 ++++++++- src/gallium/drivers/asahi/agx_state.c | 10 +++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 1791f0740524..e3fcfd1773f8 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -507,10 +507,17 @@ + + + + + + + - + diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index 790098f9f919..61a94dcb0424 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1321,11 +1321,11 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum agx_usc_pack(&b, SHARED, cfg) { if (stage == PIPE_SHADER_FRAGMENT) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0xbc; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } else { - cfg.unk_1 = 0x90; + cfg.shared_layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE; } } @@ -1369,7 +1369,7 @@ agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_ agx_usc_pack(&b, SHARED, cfg) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0xbc; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } @@ -1450,7 +1450,7 @@ agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_su agx_usc_pack(&b, SHARED, cfg) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0xbc; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } @@ -1489,7 +1489,7 @@ agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, agx_usc_pack(&b, SHARED, cfg) { cfg.uses_shared_memory = true; - cfg.unk_1 = 0xbc; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; cfg.pixel_stride_in_8_bytes = 1; cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } -- GitLab