diff --git a/src/asahi/lib/cmdbuf.xml b/src/asahi/lib/cmdbuf.xml index 045d2877089abbca6ebcb372a1d7fa03a3ef298f..e3fcfd1773f82474938624e62669e8457d93778f 100644 --- a/src/asahi/lib/cmdbuf.xml +++ b/src/asahi/lib/cmdbuf.xml @@ -469,9 +469,21 @@ - - - + + + + + + + + + + + + + + + @@ -479,80 +491,76 @@ - - + + - - + + - - - + + + + + - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + @@ -661,23 +669,30 @@ - + - + + + + + + - + + + @@ -685,7 +700,17 @@ - + + + + + + + + + + + @@ -698,7 +723,39 @@ - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -709,119 +766,139 @@ - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - - - - + + + + - - - + + + - - - - - - - - - - - - - + + + + + + + + + + + + + + diff --git a/src/asahi/lib/decode.c b/src/asahi/lib/decode.c index ce84ea0d6e3f8440c8fd1b1832dc05b7a3b699e4..242a6ea68d993a0adc7032df3ab9346fa00acf73 100644 --- a/src/asahi/lib/decode.c +++ b/src/asahi/lib/decode.c @@ -305,64 +305,63 @@ agxdecode_stateful(uint64_t va, const char *label, decode_cmd decoder, bool verb } } -unsigned COUNTER = 0; static unsigned -agxdecode_pipeline(const uint8_t *map, uint64_t *link, UNUSED bool verbose) +agxdecode_usc(const uint8_t *map, UNUSED uint64_t *link, UNUSED bool verbose) { - uint8_t zeroes[16] = { 0 }; + enum agx_usc_control type = map[0]; - if (map[0] == 0x4D && (map[11] & BITFIELD_BIT(5))) { - agx_unpack(agxdecode_dump_stream, map, SET_SHADER_EXTENDED, cmd); - DUMP_UNPACKED(SET_SHADER_EXTENDED, cmd, "Set shader\n"); +#define USC_CASE(name, human) \ + case AGX_USC_CONTROL_##name: { \ + DUMP_CL(USC_##name, map, human); \ + return AGX_USC_##name##_LENGTH; \ + } - if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) { - agxdecode_log("Preshader\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.preshader_code, 2048), - 2048, agxdecode_dump_stream); - agxdecode_log("\n---\n"); - } + switch (type) { + case AGX_USC_CONTROL_NO_PRESHADER: { + DUMP_CL(USC_NO_PRESHADER, map, "No preshader"); + return STATE_DONE; + } + + case AGX_USC_CONTROL_PRESHADER: { + agx_unpack(agxdecode_dump_stream, map, USC_PRESHADER, ctrl); + DUMP_UNPACKED(USC_PRESHADER, ctrl, "Preshader\n"); + + agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048), + 8192, agxdecode_dump_stream); + + return STATE_DONE; + } + + case AGX_USC_CONTROL_SHADER: { + agx_unpack(agxdecode_dump_stream, map, USC_SHADER, ctrl); + DUMP_UNPACKED(USC_SHADER, ctrl, "Shader\n"); agxdecode_log("\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.code, 2048), - 2048, agxdecode_dump_stream); + agx_disassemble(agxdecode_fetch_gpu_mem(ctrl.code, 2048), + 8192, agxdecode_dump_stream); agxdecode_log("\n"); - char *name; - asprintf(&name, "file%u.bin", COUNTER++); - FILE *fp = fopen(name, "wb"); - fwrite(agxdecode_fetch_gpu_mem(cmd.code, 2048), 1, 2048, fp); - fclose(fp); - free(name); - agxdecode_log("\n"); + return AGX_USC_SHADER_LENGTH; + } - return AGX_SET_SHADER_EXTENDED_LENGTH; - } else if (map[0] == 0x4D) { - agx_unpack(agxdecode_dump_stream, map, SET_SHADER, cmd); - DUMP_UNPACKED(SET_SHADER, cmd, "Set shader\n"); - fflush(agxdecode_dump_stream); + case AGX_USC_CONTROL_SAMPLER: { + agx_unpack(agxdecode_dump_stream, map, USC_SAMPLER, temp); + DUMP_UNPACKED(USC_SAMPLER, temp, "Sampler state\n"); + + uint8_t *samp = agxdecode_fetch_gpu_mem(temp.buffer, + AGX_SAMPLER_LENGTH * temp.count); - if (cmd.preshader_mode == AGX_PRESHADER_MODE_PRESHADER) { - agxdecode_log("Preshader\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.preshader_code, 2048), - 2048, agxdecode_dump_stream); - agxdecode_log("\n---\n"); + for (unsigned i = 0; i < temp.count; ++i) { + DUMP_CL(SAMPLER, samp, "Sampler"); + samp += AGX_SAMPLER_LENGTH; } - agxdecode_log("\n"); - agx_disassemble(agxdecode_fetch_gpu_mem(cmd.code, 2048), - 2048, agxdecode_dump_stream); - char *name; - asprintf(&name, "file%u.bin", COUNTER++); - FILE *fp = fopen(name, "wb"); - fwrite(agxdecode_fetch_gpu_mem(cmd.code, 2048), 1, 2048, fp); - fclose(fp); - free(name); - agxdecode_log("\n"); + return AGX_USC_SAMPLER_LENGTH; + } - return AGX_SET_SHADER_LENGTH; - } else if (map[0] == 0xDD) { - agx_unpack(agxdecode_dump_stream, map, BIND_TEXTURE, temp); - DUMP_UNPACKED(BIND_TEXTURE, temp, "Bind texture\n"); + case AGX_USC_CONTROL_TEXTURE: { + agx_unpack(agxdecode_dump_stream, map, USC_TEXTURE, temp); + DUMP_UNPACKED(USC_TEXTURE, temp, "Texture state\n"); uint8_t *tex = agxdecode_fetch_gpu_mem(temp.buffer, AGX_TEXTURE_LENGTH * temp.count); @@ -376,29 +375,22 @@ agxdecode_pipeline(const uint8_t *map, uint64_t *link, UNUSED bool verbose) tex += AGX_TEXTURE_LENGTH; } - return AGX_BIND_TEXTURE_LENGTH; - } else if (map[0] == 0x9D) { - agx_unpack(agxdecode_dump_stream, map, BIND_SAMPLER, temp); - DUMP_UNPACKED(BIND_SAMPLER, temp, "Bind sampler\n"); + return AGX_USC_TEXTURE_LENGTH; + } - uint8_t *samp = agxdecode_fetch_gpu_mem(temp.buffer, - AGX_SAMPLER_LENGTH * temp.count); + USC_CASE(FRAGMENT_PROPERTIES, "Fragment properties"); + USC_CASE(UNIFORM, "Uniform"); + USC_CASE(SHARED, "Shared"); + USC_CASE(REGISTERS, "Registers"); - for (unsigned i = 0; i < temp.count; ++i) { - DUMP_CL(SAMPLER, samp, "Sampler"); - samp += AGX_SAMPLER_LENGTH; - } - - return AGX_BIND_SAMPLER_LENGTH; - } else if (map[0] == 0x1D) { - DUMP_CL(BIND_UNIFORM, map, "Bind uniform"); - return AGX_BIND_UNIFORM_LENGTH; - } else if (memcmp(map, zeroes, 16) == 0) { - /* TODO: Termination */ - return STATE_DONE; - } else { - return 0; + default: + fprintf(agxdecode_dump_stream, "Unknown USC control type: %u\n", + type); + hexdump(agxdecode_dump_stream, map, 8, false); + return 8; } + +#undef USC_CASE } #define PPP_PRINT(map, header_name, struct_name, human) \ @@ -438,7 +430,7 @@ agxdecode_record(uint64_t va, size_t size, bool verbose) if (hdr.fragment_shader) { agx_unpack(agxdecode_dump_stream, map, FRAGMENT_SHADER, frag); - agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_pipeline, verbose); + agxdecode_stateful(frag.pipeline, "Fragment pipeline", agxdecode_usc, verbose); if (frag.cf_bindings) { uint8_t *cf = agxdecode_fetch_gpu_mem(frag.cf_bindings, 128); @@ -468,16 +460,42 @@ agxdecode_record(uint64_t va, size_t size, bool verbose) } static unsigned -agxdecode_cmd(const uint8_t *map, uint64_t *link, bool verbose) +agxdecode_cdm(const uint8_t *map, uint64_t *link, bool verbose) { - if (map[0] == 0x02 && map[1] == 0x10 && map[2] == 0x00 && map[3] == 0x00) { - /* XXX: This is a CDM command not a VDM one */ + /* Bits 29-31 contain the block type */ + enum agx_cdm_block_type block_type = (map[3] >> 5); + + switch (block_type) { + case AGX_CDM_BLOCK_TYPE_COMPUTE_KERNEL: { agx_unpack(agxdecode_dump_stream, map, LAUNCH, cmd); - agxdecode_stateful(cmd.pipeline, "Pipeline", agxdecode_pipeline, verbose); + agxdecode_stateful(cmd.pipeline, "Pipeline", agxdecode_usc, verbose); DUMP_UNPACKED(LAUNCH, cmd, "Launch\n"); return AGX_LAUNCH_LENGTH; } + case AGX_CDM_BLOCK_TYPE_STREAM_LINK: { + agx_unpack(agxdecode_dump_stream, map, CDM_STREAM_LINK, hdr); + DUMP_UNPACKED(CDM_STREAM_LINK, hdr, "Stream Link\n"); + *link = hdr.target_lo | (((uint64_t) hdr.target_hi) << 32); + return STATE_LINK; + } + + case AGX_CDM_BLOCK_TYPE_STREAM_TERMINATE: { + DUMP_CL(CDM_STREAM_TERMINATE, map, "Stream Terminate"); + return STATE_DONE; + } + + default: + fprintf(agxdecode_dump_stream, "Unknown CDM block type: %u\n", + block_type); + hexdump(agxdecode_dump_stream, map, 8, false); + return 8; + } +} + +static unsigned +agxdecode_vdm(const uint8_t *map, uint64_t *link, bool verbose) +{ /* Bits 29-31 contain the block type */ enum agx_vdm_block_type block_type = (map[3] >> 5); @@ -515,7 +533,7 @@ agxdecode_cmd(const uint8_t *map, uint64_t *link, bool verbose) agx_unpack(agxdecode_dump_stream, map, VDM_STATE_VERTEX_SHADER_WORD_1, word_1); fprintf(agxdecode_dump_stream, "Pipeline %X\n", (uint32_t) word_1.pipeline); - agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_pipeline, verbose); + agxdecode_stateful(word_1.pipeline, "Pipeline", agxdecode_usc, verbose); } VDM_PRINT(vertex_shader_word_1, VERTEX_SHADER_WORD_1, "Vertex shader word 1"); @@ -550,14 +568,14 @@ agxdecode_cmd(const uint8_t *map, uint64_t *link, bool verbose) } case AGX_VDM_BLOCK_TYPE_STREAM_LINK: { - agx_unpack(agxdecode_dump_stream, map, STREAM_LINK, hdr); - DUMP_UNPACKED(STREAM_LINK, hdr, "Stream Link\n"); + agx_unpack(agxdecode_dump_stream, map, VDM_STREAM_LINK, hdr); + DUMP_UNPACKED(VDM_STREAM_LINK, hdr, "Stream Link\n"); *link = hdr.target_lo | (((uint64_t) hdr.target_hi) << 32); return STATE_LINK; } case AGX_VDM_BLOCK_TYPE_STREAM_TERMINATE: { - DUMP_CL(STREAM_TERMINATE, map, "Stream Terminate"); + DUMP_CL(VDM_STREAM_TERMINATE, map, "Stream Terminate"); return STATE_DONE; } @@ -569,6 +587,47 @@ agxdecode_cmd(const uint8_t *map, uint64_t *link, bool verbose) } } +static void +agxdecode_cs(uint32_t *cmdbuf, uint64_t encoder, bool verbose) +{ + agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_COMPUTE, cs); + DUMP_UNPACKED(IOGPU_COMPUTE, cs, "Compute\n"); + + agxdecode_stateful(encoder, "Encoder", agxdecode_cdm, verbose); +} + +static void +agxdecode_gfx(uint32_t *cmdbuf, uint64_t encoder, bool verbose) +{ + agx_unpack(agxdecode_dump_stream, cmdbuf + 16, IOGPU_GRAPHICS, gfx); + DUMP_UNPACKED(IOGPU_GRAPHICS, gfx, "Graphics\n"); + + agxdecode_stateful(encoder, "Encoder", agxdecode_vdm, verbose); + + if (gfx.clear_pipeline_unk) { + fprintf(agxdecode_dump_stream, "Unk: %X\n", gfx.clear_pipeline_unk); + agxdecode_stateful(gfx.clear_pipeline, "Clear pipeline", + agxdecode_usc, verbose); + } + + if (gfx.store_pipeline_unk) { + assert(gfx.store_pipeline_unk == 0x4); + agxdecode_stateful(gfx.store_pipeline, "Store pipeline", + agxdecode_usc, verbose); + } + + assert((gfx.partial_reload_pipeline_unk & 0xF) == 0x4); + if (gfx.partial_reload_pipeline) { + agxdecode_stateful(gfx.partial_reload_pipeline, + "Partial reload pipeline", agxdecode_usc, verbose); + } + + if (gfx.partial_store_pipeline) { + agxdecode_stateful(gfx.partial_store_pipeline, + "Partial store pipeline", agxdecode_usc, verbose); + } +} + void agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) { @@ -585,26 +644,6 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) /* Print the IOGPU stuff */ agx_unpack(agxdecode_dump_stream, cmdbuf->ptr.cpu, IOGPU_HEADER, cmd); DUMP_UNPACKED(IOGPU_HEADER, cmd, "IOGPU Header\n"); - agx_unpack(agxdecode_dump_stream, ((uint32_t *) cmdbuf->ptr.cpu) + 160, - IOGPU_INTERNAL_PIPELINES, pip); - - DUMP_CL(IOGPU_INTERNAL_PIPELINES, ((uint32_t *) cmdbuf->ptr.cpu) + 160, "Internal pipelines"); - DUMP_CL(IOGPU_AUX_FRAMEBUFFER, ((uint32_t *) cmdbuf->ptr.cpu) + 228, "Aux Framebuffer"); - - agx_unpack(agxdecode_dump_stream, ((uint32_t *) cmdbuf->ptr.cpu) + 292, - IOGPU_CLEAR_Z_S, clearzs); - DUMP_UNPACKED(IOGPU_CLEAR_Z_S, clearzs, "Clear Z/S"); - - /* Guard against changes */ - uint32_t zeroes[356 - 344] = { 0 }; - assert(memcmp(((uint32_t *) cmdbuf->ptr.cpu) + 344, zeroes, 4 * (356 - 344)) == 0); - - DUMP_CL(IOGPU_MISC, ((uint32_t *) cmdbuf->ptr.cpu) + 356, "Misc"); - - /* Should be unused, we think */ - for (unsigned i = (0x6B0 / 4); i < (cmd.attachment_offset / 4); ++i) { - assert(((uint32_t *) cmdbuf->ptr.cpu)[i] == 0); - } DUMP_CL(IOGPU_ATTACHMENT_COUNT, ((uint8_t *) cmdbuf->ptr.cpu + cmd.attachment_offset), "Attachment count"); @@ -616,31 +655,10 @@ agxdecode_cmdstream(unsigned cmdbuf_handle, unsigned map_handle, bool verbose) DUMP_CL(IOGPU_ATTACHMENT, ptr, "Attachment"); } - uint64_t *encoder = ((uint64_t *) cmdbuf->ptr.cpu) + 7; - agxdecode_stateful(*encoder, "Encoder", agxdecode_cmd, verbose); - - if (pip.clear_pipeline_unk) { - fprintf(agxdecode_dump_stream, "Unk: %X\n", pip.clear_pipeline_unk); - agxdecode_stateful(pip.clear_pipeline, "Clear pipeline", - agxdecode_pipeline, verbose); - } - - if (pip.store_pipeline_unk) { - assert(pip.store_pipeline_unk == 0x4); - agxdecode_stateful(pip.store_pipeline, "Store pipeline", - agxdecode_pipeline, verbose); - } - - assert((clearzs.partial_reload_pipeline_unk & 0xF) == 0x4); - if (clearzs.partial_reload_pipeline) { - agxdecode_stateful(clearzs.partial_reload_pipeline, - "Partial reload pipeline", agxdecode_pipeline, verbose); - } - - if (clearzs.partial_store_pipeline) { - agxdecode_stateful(clearzs.partial_store_pipeline, - "Partial store pipeline", agxdecode_pipeline, verbose); - } + if (cmd.unk_5 == 3) + agxdecode_cs((uint32_t *) cmdbuf->ptr.cpu, cmd.encoder, verbose); + else + agxdecode_gfx((uint32_t *) cmdbuf->ptr.cpu, cmd.encoder, verbose); agxdecode_map_read_write(); } diff --git a/src/asahi/lib/gen_pack.py b/src/asahi/lib/gen_pack.py index 430daeba5ba6f7475ffa77af810174b78a024121..a8d839e0e2ce958338b3ff96d97358d7531f9796 100644 --- a/src/asahi/lib/gen_pack.py +++ b/src/asahi/lib/gen_pack.py @@ -26,6 +26,7 @@ import xml.parsers.expat import sys import operator +import math from functools import reduce global_prefix = "agx" @@ -369,7 +370,7 @@ class Group(object): elif field.modifier[0] == "log2": print(" assert(util_is_power_of_two_nonzero(values->{}));".format(field.name)) - for index in range(self.length // 4): + for index in range(math.ceil(self.length / 4)): # Handle MBZ words if not index in words: print(" cl[%2d] = 0;" % index) diff --git a/src/gallium/drivers/asahi/agx_pipe.c b/src/gallium/drivers/asahi/agx_pipe.c index 9b59e235b38621e4dea188940b892f6121c4460b..105a1443424d9a10568fe1f294c659fec7467f92 100644 --- a/src/gallium/drivers/asahi/agx_pipe.c +++ b/src/gallium/drivers/asahi/agx_pipe.c @@ -167,7 +167,10 @@ agx_resource_create(struct pipe_screen *screen, nresource->modifier = agx_select_modifier(nresource); nresource->mipmapped = (templ->last_level > 0); - nresource->internal_format = nresource->base.format; + + assert(templ->format != PIPE_FORMAT_Z24X8_UNORM && + templ->format != PIPE_FORMAT_Z24_UNORM_S8_UINT && + "u_transfer_helper should have lowered"); nresource->layout = (struct ail_layout) { .tiling = (nresource->modifier == DRM_FORMAT_MOD_LINEAR) ? @@ -282,10 +285,10 @@ agx_transfer_map(struct pipe_context *pctx, if (rsrc->modifier == DRM_FORMAT_MOD_APPLE_TWIDDLED) { transfer->base.stride = - util_format_get_stride(resource->format, box->width); + util_format_get_stride(rsrc->layout.format, box->width); transfer->base.layer_stride = - util_format_get_2d_size(resource->format, transfer->base.stride, + util_format_get_2d_size(rsrc->layout.format, transfer->base.stride, box->height); transfer->map = calloc(transfer->base.layer_stride, box->depth); @@ -1139,7 +1142,7 @@ agx_resource_get_stencil(struct pipe_resource *prsrc) static enum pipe_format agx_resource_get_internal_format(struct pipe_resource *prsrc) { - return agx_resource(prsrc)->internal_format; + return agx_resource(prsrc)->layout.format; } static const struct u_transfer_vtbl transfer_vtbl = { diff --git a/src/gallium/drivers/asahi/agx_state.c b/src/gallium/drivers/asahi/agx_state.c index c887c8577aea41e4bfafdbd295e8372212b176a8..61a94dcb0424ba665cbb0edda08a9a89f05b1e4a 100644 --- a/src/gallium/drivers/asahi/agx_state.c +++ b/src/gallium/drivers/asahi/agx_state.c @@ -1193,20 +1193,68 @@ agx_delete_shader_state(struct pipe_context *ctx, free(so); } -/* Pipeline consists of a sequence of binding commands followed by a set shader command */ -static uint32_t -agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) +struct agx_usc_builder { + struct agx_ptr T; + uint8_t *head; + +#ifndef NDEBUG + size_t size; +#endif +}; + +static struct agx_usc_builder +agx_alloc_usc_control(struct agx_pool *pool, + unsigned num_reg_bindings) { - /* Pipelines must be 64-byte aligned */ - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (cs->info.push_ranges * AGX_BIND_UNIFORM_LENGTH) + - AGX_BIND_TEXTURE_LENGTH + - AGX_BIND_SAMPLER_LENGTH + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); + STATIC_ASSERT(AGX_USC_TEXTURE_LENGTH == AGX_USC_UNIFORM_LENGTH); + STATIC_ASSERT(AGX_USC_SAMPLER_LENGTH == AGX_USC_UNIFORM_LENGTH); + + size_t size = AGX_USC_UNIFORM_LENGTH * num_reg_bindings; - uint8_t *record = ptr.cpu; + size += AGX_USC_SHARED_LENGTH; + size += AGX_USC_SHADER_LENGTH; + size += AGX_USC_REGISTERS_LENGTH; + size += MAX2(AGX_USC_NO_PRESHADER_LENGTH, AGX_USC_PRESHADER_LENGTH); + size += AGX_USC_FRAGMENT_PROPERTIES_LENGTH; + + struct agx_usc_builder b = { + .T = agx_pool_alloc_aligned(pool, size, 64), + +#ifndef NDEBUG + .size = size, +#endif + }; + + b.head = (uint8_t *) b.T.cpu; + + return b; +} +static bool +agx_usc_builder_validate(struct agx_usc_builder *b, size_t size) +{ +#ifndef NDEBUG + assert(((b->head - (uint8_t *) b->T.cpu) + size) <= b->size); +#endif + + return true; +} + +#define agx_usc_pack(b, struct_name, template) \ + for (bool it = agx_usc_builder_validate((b), AGX_USC_##struct_name##_LENGTH); \ + it; it = false, (b)->head += AGX_USC_##struct_name##_LENGTH) \ + agx_pack((b)->head, USC_##struct_name, template) + +static uint32_t +agx_usc_fini(struct agx_usc_builder *b) +{ + assert(b->T.gpu <= (1ull << 32) && "pipelines must be in low memory"); + return b->T.gpu; +} + +static uint32_t +agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum pipe_shader_type stage) +{ unsigned nr_textures = ctx->stage[stage].texture_count; unsigned nr_samplers = ctx->stage[stage].sampler_count; @@ -1235,25 +1283,26 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum samplers[i] = sampler->desc; } + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, + cs->info.push_ranges + 2); + if (nr_textures) { - agx_pack(record, BIND_TEXTURE, cfg) { + agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = 0; cfg.count = nr_textures; cfg.buffer = T_tex.gpu; } ctx->batch->textures = T_tex.gpu; - record += AGX_BIND_TEXTURE_LENGTH; } if (nr_samplers) { - agx_pack(record, BIND_SAMPLER, cfg) { + agx_usc_pack(&b, SAMPLER, cfg) { cfg.start = 0; cfg.count = nr_samplers; cfg.buffer = T_samp.gpu; } - - record += AGX_BIND_SAMPLER_LENGTH; } /* Must only upload uniforms after uploading textures so we can implement the @@ -1262,95 +1311,83 @@ agx_build_pipeline(struct agx_context *ctx, struct agx_compiled_shader *cs, enum for (unsigned i = 0; i < cs->info.push_ranges; ++i) { struct agx_push push = cs->info.push[i]; - agx_pack(record, BIND_UNIFORM, cfg) { + agx_usc_pack(&b, UNIFORM, cfg) { cfg.start_halfs = push.base; cfg.size_halfs = push.length; cfg.buffer = agx_push_location(ctx, push, stage); } - - record += AGX_BIND_UNIFORM_LENGTH; } - /* TODO: Can we prepack this? */ - if (stage == PIPE_SHADER_FRAGMENT) { - bool writes_sample_mask = ctx->fs->info.writes_sample_mask; - - agx_pack(record, SET_SHADER_EXTENDED, cfg) { - cfg.code = cs->bo->ptr.gpu; - cfg.register_quadwords = 0; - cfg.unk_3 = 0x8d; - cfg.unk_1 = 0x2010bd; - cfg.unk_2 = 0x0d; - cfg.loads_varyings = true; - cfg.fragment_parameters.early_z_testing = !writes_sample_mask; - cfg.unk_4 = 0x800; - cfg.preshader_unk = 0xc080; - cfg.spill_size = 0x2; + agx_usc_pack(&b, SHARED, cfg) { + if (stage == PIPE_SHADER_FRAGMENT) { + cfg.uses_shared_memory = true; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; + cfg.pixel_stride_in_8_bytes = 1; + cfg.shared_memory_per_threadgroup_in_256_bytes = 32; + } else { + cfg.shared_layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE; } + } - record += AGX_SET_SHADER_EXTENDED_LENGTH; - } else { - agx_pack(record, SET_SHADER, cfg) { - cfg.code = cs->bo->ptr.gpu; - cfg.register_quadwords = 0; - cfg.unk_2b = cs->info.varyings.vs.nr_index; - cfg.unk_2 = 0x0d; - } + agx_usc_pack(&b, SHADER, cfg) { + cfg.loads_varyings = (stage == PIPE_SHADER_FRAGMENT); + cfg.code = cs->bo->ptr.gpu; + cfg.unk_2 = (stage == PIPE_SHADER_FRAGMENT) ? 2 : 3; + } - record += AGX_SET_SHADER_LENGTH; + agx_usc_pack(&b, REGISTERS, cfg) { + cfg.register_quadwords = 0; + cfg.unk_1 = (stage == PIPE_SHADER_FRAGMENT); } - /* End pipeline */ - memset(record, 0, 8); - assert(ptr.gpu < (1ull << 32)); - return ptr.gpu; + if (stage == PIPE_SHADER_FRAGMENT) { + agx_usc_pack(&b, FRAGMENT_PROPERTIES, cfg) { + bool writes_sample_mask = ctx->fs->info.writes_sample_mask; + cfg.early_z_testing = !writes_sample_mask; + cfg.unk_4 = 0x2; + cfg.unk_5 = 0x0; + } + } + + agx_usc_pack(&b, NO_PRESHADER, cfg); + + return agx_usc_fini(&b); } /* Internal pipelines (TODO: refactor?) */ uint64_t agx_build_clear_pipeline(struct agx_context *ctx, uint32_t code, uint64_t clear_buf) { - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (1 * AGX_BIND_UNIFORM_LENGTH) + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); - - uint8_t *record = ptr.cpu; + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, 1); - agx_pack(record, BIND_UNIFORM, cfg) { + agx_usc_pack(&b, UNIFORM, cfg) { cfg.start_halfs = (6 * 2); cfg.size_halfs = 4; cfg.buffer = clear_buf; } - record += AGX_BIND_UNIFORM_LENGTH; + agx_usc_pack(&b, SHARED, cfg) { + cfg.uses_shared_memory = true; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; + cfg.pixel_stride_in_8_bytes = 1; + cfg.shared_memory_per_threadgroup_in_256_bytes = 32; + } - /* TODO: Can we prepack this? */ - agx_pack(record, SET_SHADER, cfg) { + agx_usc_pack(&b, SHADER, cfg) { cfg.code = code; - cfg.unk_1 = 0x2010bd; - cfg.unk_2 = 0x0d; - cfg.unk_3 = 0x8d; - cfg.register_quadwords = 1; + cfg.unk_2 = 3; } - record += AGX_SET_SHADER_LENGTH; + agx_usc_pack(&b, REGISTERS, cfg) cfg.register_quadwords = 1; + agx_usc_pack(&b, NO_PRESHADER, cfg); - /* End pipeline */ - memset(record, 0, 8); - return ptr.gpu; + return agx_usc_fini(&b); } uint64_t agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_surface *surf) { - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (1 * AGX_BIND_TEXTURE_LENGTH) + - (1 * AGX_BIND_SAMPLER_LENGTH) + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); - - uint8_t *record = ptr.cpu; struct agx_ptr sampler = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_SAMPLER_LENGTH, 64); struct agx_ptr texture = agx_pool_alloc_aligned(&ctx->batch->pool, AGX_TEXTURE_LENGTH, 64); @@ -1396,91 +1433,72 @@ agx_build_reload_pipeline(struct agx_context *ctx, uint32_t code, struct pipe_su cfg.unk_tiled = true; } - agx_pack(record, BIND_TEXTURE, cfg) { + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, 2); + + agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = texture.gpu; } - record += AGX_BIND_TEXTURE_LENGTH; - - agx_pack(record, BIND_SAMPLER, cfg) { + agx_usc_pack(&b, SAMPLER, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = sampler.gpu; } - record += AGX_BIND_SAMPLER_LENGTH; + agx_usc_pack(&b, SHARED, cfg) { + cfg.uses_shared_memory = true; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; + cfg.pixel_stride_in_8_bytes = 1; + cfg.shared_memory_per_threadgroup_in_256_bytes = 32; + } - /* TODO: Can we prepack this? */ - agx_pack(record, SET_SHADER_EXTENDED, cfg) { + agx_usc_pack(&b, SHADER, cfg) { cfg.code = code; - cfg.register_quadwords = 0; - cfg.unk_3 = 0x8d; - cfg.unk_2 = 0x0d; - cfg.unk_4 = 0; - cfg.fragment_parameters.unk_1 = 0x880100; - cfg.fragment_parameters.early_z_testing = false; - cfg.fragment_parameters.unk_2 = false; - cfg.fragment_parameters.unk_3 = 0; - cfg.preshader_mode = 0; // XXX + cfg.unk_2 = 3; } - record += AGX_SET_SHADER_EXTENDED_LENGTH; + agx_usc_pack(&b, REGISTERS, cfg) cfg.register_quadwords = 0; + agx_usc_pack(&b, NO_PRESHADER, cfg); - /* End pipeline */ - memset(record, 0, 8); - return ptr.gpu; + return agx_usc_fini(&b); } uint64_t agx_build_store_pipeline(struct agx_context *ctx, uint32_t code, uint64_t render_target) { - struct agx_ptr ptr = agx_pool_alloc_aligned(&ctx->batch->pipeline_pool, - (1 * AGX_BIND_TEXTURE_LENGTH) + - (1 * AGX_BIND_UNIFORM_LENGTH) + - AGX_SET_SHADER_EXTENDED_LENGTH + 8, - 64); + struct agx_usc_builder b = + agx_alloc_usc_control(&ctx->batch->pipeline_pool, 2); - uint8_t *record = ptr.cpu; - - agx_pack(record, BIND_TEXTURE, cfg) { + agx_usc_pack(&b, TEXTURE, cfg) { cfg.start = 0; cfg.count = 1; cfg.buffer = render_target; } - record += AGX_BIND_TEXTURE_LENGTH; - uint32_t unk[] = { 0, ~0 }; - agx_pack(record, BIND_UNIFORM, cfg) { + agx_usc_pack(&b, UNIFORM, cfg) { cfg.start_halfs = 4; cfg.size_halfs = 4; cfg.buffer = agx_pool_upload_aligned(&ctx->batch->pool, unk, sizeof(unk), 16); } - record += AGX_BIND_UNIFORM_LENGTH; - - /* TODO: Can we prepack this? */ - agx_pack(record, SET_SHADER_EXTENDED, cfg) { - cfg.code = code; - cfg.register_quadwords = 1; - cfg.unk_2 = 0xd; - cfg.unk_3 = 0x8d; - cfg.fragment_parameters.unk_1 = 0x880100; - cfg.fragment_parameters.early_z_testing = false; - cfg.fragment_parameters.unk_2 = false; - cfg.fragment_parameters.unk_3 = 0; - cfg.preshader_mode = 0; // XXX + agx_usc_pack(&b, SHARED, cfg) { + cfg.uses_shared_memory = true; + cfg.shared_layout = AGX_SHARED_LAYOUT_32X32; + cfg.pixel_stride_in_8_bytes = 1; + cfg.shared_memory_per_threadgroup_in_256_bytes = 32; } - record += AGX_SET_SHADER_EXTENDED_LENGTH; + agx_usc_pack(&b, SHADER, cfg) cfg.code = code; + agx_usc_pack(&b, REGISTERS, cfg) cfg.register_quadwords = 1; + agx_usc_pack(&b, NO_PRESHADER, cfg); - /* End pipeline */ - memset(record, 0, 8); - return ptr.gpu; + return agx_usc_fini(&b); } void @@ -1792,7 +1810,7 @@ agx_ensure_cmdbuf_has_space(struct agx_batch *batch, size_t space) struct agx_ptr T = agx_pool_alloc_aligned(&batch->pool, size, 256); /* Jump from the old command buffer to the new command buffer */ - agx_pack(batch->encoder_current, STREAM_LINK, cfg) { + agx_pack(batch->encoder_current, VDM_STREAM_LINK, cfg) { cfg.target_lo = T.gpu & BITFIELD_MASK(32); cfg.target_hi = T.gpu >> 32; } diff --git a/src/gallium/drivers/asahi/agx_state.h b/src/gallium/drivers/asahi/agx_state.h index 862f069f0706f35fb4d42d3489b521a34422b0af..868ef1985df67d6228a52479e727e95f25356ada 100644 --- a/src/gallium/drivers/asahi/agx_state.h +++ b/src/gallium/drivers/asahi/agx_state.h @@ -294,9 +294,6 @@ struct agx_resource { * resources. */ struct agx_resource *separate_stencil; - - /* Internal format, since many depth/stencil formats are emulated. */ - enum pipe_format internal_format; }; static inline struct agx_resource * diff --git a/src/gallium/drivers/asahi/magic.c b/src/gallium/drivers/asahi/magic.c index 9683b6bc76a7838ab802e6e0b05fdc04bde9140a..a5dfd62daa33b1734f7b79bcaa98e263782e849e 100644 --- a/src/gallium/drivers/asahi/magic.c +++ b/src/gallium/drivers/asahi/magic.c @@ -109,15 +109,11 @@ agx_map_surface(struct pipe_surface *surf) static void asahi_pack_iogpu_attachment(void *out, struct agx_resource *rsrc, - struct pipe_surface *surf, unsigned total_size) { - /* We don't support layered rendering yet */ - assert(surf->u.tex.first_layer == surf->u.tex.last_layer); - agx_pack(out, IOGPU_ATTACHMENT, cfg) { - cfg.type = asahi_classify_attachment(rsrc->base.format); - cfg.address = agx_map_surface_resource(surf, rsrc); + cfg.type = asahi_classify_attachment(rsrc->layout.format); + cfg.address = rsrc->bo->ptr.gpu; cfg.size = rsrc->layout.size_B; cfg.percent = (100 * cfg.size) / total_size; } @@ -133,7 +129,6 @@ asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuff for (unsigned i = 0; i < framebuffer->nr_cbufs; ++i) { asahi_pack_iogpu_attachment(attachments + (nr++), agx_resource(framebuffer->cbufs[i]->texture), - framebuffer->cbufs[i], total_attachment_size); } @@ -141,13 +136,11 @@ asahi_pack_iogpu_attachments(void *out, struct pipe_framebuffer_state *framebuff struct agx_resource *rsrc = agx_resource(framebuffer->zsbuf->texture); asahi_pack_iogpu_attachment(attachments + (nr++), - rsrc, framebuffer->zsbuf, - total_attachment_size); + rsrc, total_attachment_size); if (rsrc->separate_stencil) { asahi_pack_iogpu_attachment(attachments + (nr++), rsrc->separate_stencil, - framebuffer->zsbuf, total_attachment_size); } } @@ -186,7 +179,13 @@ demo_cmdbuf(uint64_t *buf, size_t size, uint64_t depth_buffer = 0; uint64_t stencil_buffer = 0; - agx_pack(map + 160, IOGPU_INTERNAL_PIPELINES, cfg) { + agx_pack(map + 16, IOGPU_GRAPHICS, cfg) { + cfg.opengl_depth_clipping = true; + + cfg.deflake_1 = deflake_1; + cfg.deflake_2 = deflake_2; + cfg.deflake_3 = deflake_buffer; + cfg.clear_pipeline_bind = 0xffff8002 | (clear_pipeline_textures ? 0x210 : 0); cfg.clear_pipeline = pipeline_clear; @@ -199,7 +198,11 @@ demo_cmdbuf(uint64_t *buf, size_t size, if (framebuffer->zsbuf) { struct pipe_surface *zsbuf = framebuffer->zsbuf; const struct util_format_description *desc = - util_format_description(zsbuf->texture->format); + util_format_description(agx_resource(zsbuf->texture)->layout.format); + + assert(desc->format == PIPE_FORMAT_Z32_FLOAT || + desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT || + desc->format == PIPE_FORMAT_S8_UINT); cfg.depth_width = framebuffer->width; cfg.depth_height = framebuffer->height; @@ -207,26 +210,22 @@ demo_cmdbuf(uint64_t *buf, size_t size, if (util_format_has_depth(desc)) { depth_buffer = agx_map_surface(zsbuf); - cfg.depth_reload = !should_clear_depth; - cfg.depth_flags |= 0x80000; - if (!should_clear_depth) cfg.depth_flags |= 0x8000; + cfg.zls_control.z_store_enable = true; + cfg.zls_control.z_load_enable = !should_clear_depth; } else { stencil_buffer = agx_map_surface(zsbuf); - cfg.depth_flags |= 0x40000; - if (!should_clear_stencil) cfg.depth_flags |= 0x4000; + cfg.zls_control.s_store_enable = true; + cfg.zls_control.s_load_enable = !should_clear_stencil; } if (agx_resource(zsbuf->texture)->separate_stencil) { stencil_buffer = agx_map_surface_resource(zsbuf, agx_resource(zsbuf->texture)->separate_stencil); - cfg.depth_flags |= 0x40000; - if (!should_clear_stencil) cfg.depth_flags |= 0x4000; + cfg.zls_control.s_store_enable = true; + cfg.zls_control.s_load_enable = !should_clear_stencil; } - cfg.depth_buffer_if_clearing = depth_buffer; - cfg.stencil_buffer = stencil_buffer; - /* It's unclear how tile size is conveyed for depth/stencil targets, * which interactions with mipmapping (for example of a 33x33 * depth/stencil attachment) @@ -234,18 +233,17 @@ demo_cmdbuf(uint64_t *buf, size_t size, if (zsbuf->u.tex.level != 0) unreachable("todo: mapping other levels"); - cfg.depth_buffer = depth_buffer; + cfg.depth_buffer_1 = depth_buffer; + cfg.depth_buffer_2 = depth_buffer; + + cfg.stencil_buffer_1 = stencil_buffer; cfg.stencil_buffer_2 = stencil_buffer; } - } - agx_pack(map + 228, IOGPU_AUX_FRAMEBUFFER, cfg) { - cfg.width = framebuffer->width; - cfg.height = framebuffer->height; + cfg.width_1 = framebuffer->width; + cfg.height_1 = framebuffer->height; cfg.pointer = unk_buffer_2; - } - agx_pack(map + 292, IOGPU_CLEAR_Z_S, cfg) { cfg.set_when_reloading_z_or_s_1 = clear_pipeline_textures; if (depth_buffer && !should_clear_depth) { @@ -266,16 +264,14 @@ demo_cmdbuf(uint64_t *buf, size_t size, cfg.partial_store_pipeline_bind = 0x12; cfg.partial_store_pipeline = pipeline_store; - } - agx_pack(map + 356, IOGPU_MISC, cfg) { - cfg.depth_buffer = depth_buffer; - cfg.stencil_buffer = stencil_buffer; + cfg.depth_buffer_3 = depth_buffer; + cfg.stencil_buffer_3 = stencil_buffer; cfg.encoder_id = encoder_id; cfg.unknown_buffer = demo_unk6(pool); - cfg.width = framebuffer->width; - cfg.height = framebuffer->height; - cfg.unk_80 = clear_pipeline_textures ? 0x0 : 0x1; + cfg.width_2 = framebuffer->width; + cfg.height_2 = framebuffer->height; + cfg.unk_352 = clear_pipeline_textures ? 0x0 : 0x1; } unsigned offset_unk = (484 * 4); @@ -295,11 +291,6 @@ demo_cmdbuf(uint64_t *buf, size_t size, cfg.attachment_length = nr_attachments * AGX_IOGPU_ATTACHMENT_LENGTH; cfg.unknown_offset = offset_unk; cfg.encoder = encoder_ptr; - cfg.opengl_depth_clipping = true; - - cfg.deflake_1 = deflake_1; - cfg.deflake_2 = deflake_2; - cfg.deflake_3 = deflake_buffer; } return total_size;