Commit fec65187 authored by Joshua Ashton's avatar Joshua Ashton 🐸
Browse files

prolog debugging

parent be9023e4
Pipeline #533761 waiting for manual action with stages
......@@ -12015,8 +12015,6 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
const struct radv_shader_info* info,
const struct radv_shader_args* args, unsigned* num_preserved_sgprs)
{
assert(key->num_attributes > 0);
/* This should be enough for any shader/stage. */
unsigned max_user_sgprs = options->chip_class >= GFX9 ? 32 : 16;
*num_preserved_sgprs = max_user_sgprs + 14;
......@@ -12032,7 +12030,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
Builder bld(program, block);
block->instructions.reserve(16 + key->num_attributes * 4);
block->instructions.reserve(16 + MAX2(key->num_attributes, 1) * 4);
bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
......@@ -12053,13 +12051,14 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
PhysReg attributes_start(256 + args->ac.num_vgprs_used);
/* choose vgprs that won't be used for anything else until the last attribute load */
PhysReg vertex_index(attributes_start.reg() + key->num_attributes * 4 - 1);
PhysReg instance_index(attributes_start.reg() + key->num_attributes * 4 - 2);
PhysReg start_instance_vgpr(attributes_start.reg() + key->num_attributes * 4 - 3);
PhysReg nontrivial_tmp_vgpr0(attributes_start.reg() + key->num_attributes * 4 - 4);
PhysReg nontrivial_tmp_vgpr1(attributes_start.reg() + key->num_attributes * 4);
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
PhysReg vertex_index(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 1);
PhysReg instance_index(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 2);
PhysReg start_instance_vgpr(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 3);
PhysReg nontrivial_tmp_vgpr0(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 4);
PhysReg nontrivial_tmp_vgpr1(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4);
if (key->num_attributes)
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
get_arg_fixed(args, args->ac.vertex_buffers));
if (options->address32_hi >= 0xffff8000 || options->address32_hi <= 0x7fff) {
bld.sopk(aco_opcode::s_movk_i32, Definition(vertex_buffers.advance(4), s1),
......@@ -12071,7 +12070,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* calculate vgpr requirements */
unsigned num_vgprs = attributes_start.reg() - 256;
num_vgprs += key->num_attributes * 4;
num_vgprs += MAX2(key->num_attributes, 1) * 4;
if (has_nontrivial_divisors && program->chip_class <= GFX8)
num_vgprs++; /* make space for nontrivial_tmp_vgpr1 */
unsigned num_sgprs = 0;
......
......@@ -2711,7 +2711,7 @@ radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_
/* From total number of attributes to offset. */
static const uint16_t total_to_offset[16] = {0, 1, 4, 10, 20, 35, 56, 84,
120, 165, 220, 286, 364, 455, 560, 680};
unsigned start_index = total_to_offset[num_attributes - 1];
unsigned start_index = total_to_offset[num_attributes];
/* From number of instanced attributes to offset. This would require a different LUT depending on
* the total number of attributes, but we can exploit a pattern to use just the LUT for 16 total
......@@ -2794,7 +2794,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
(!vs_shader->info.vs.as_ls || !instance_rate_inputs) &&
!misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) {
if (!instance_rate_inputs) {
prolog = device->simple_vs_prologs[num_attributes - 1];
prolog = device->simple_vs_prologs[num_attributes];
} else if (num_attributes <= 16 && !*nontrivial_divisors &&
util_bitcount(instance_rate_inputs) ==
(util_last_bit(instance_rate_inputs) - ffs(instance_rate_inputs) + 1)) {
......@@ -2994,6 +2994,7 @@ emit_prolog_inputs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_sh
struct radv_userdata_info *loc =
&vs_shader->info.user_sgprs_locs.shader_data[AC_UD_VS_PROLOG_INPUTS];
uint32_t base_reg = cmd_buffer->state.pipeline->user_data_0[MESA_SHADER_VERTEX];
fprintf(stderr, "prolog_jump_va: 0x%lx - base_reg: %u - sh_offset: %u\n", input_va, base_reg, base_reg + loc->sgpr_idx * 4);
assert(loc->sgpr_idx != -1);
assert(loc->num_sgprs == 2);
radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
......
......@@ -2801,14 +2801,14 @@ radv_device_init_vs_prologs(struct radv_device *device)
key.next_stage = MESA_SHADER_VERTEX;
key.wave32 = device->physical_device->ge_wave_size == 32;
for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
state.attribute_mask = BITFIELD_MASK(i);
state.instance_rate_inputs = 0;
key.num_attributes = i;
device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
if (!device->simple_vs_prologs[i - 1])
device->simple_vs_prologs[i] = radv_create_vs_prolog(device, &key);
if (!device->simple_vs_prologs[i])
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
......
......@@ -42,6 +42,7 @@
#include "ac_nir.h"
#include "ac_rtld.h"
#include "aco_interface.h"
#include "shader_enums.h"
#include "sid.h"
#include "vk_format.h"
......@@ -513,6 +514,63 @@ radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
return progress;
}
static unsigned char test_spv[] = {
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x0a, 0x00, 0x08, 0x00,
0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00,
0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x03, 0x00, 0x02, 0x00, 0x00, 0x00,
0xae, 0x01, 0x00, 0x00, 0x05, 0x00, 0x04, 0x00, 0x04, 0x00, 0x00, 0x00,
0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x06, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x67, 0x6c, 0x5f, 0x50, 0x65, 0x72, 0x56, 0x65,
0x72, 0x74, 0x65, 0x78, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x06, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6c, 0x5f, 0x50,
0x6f, 0x73, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x00, 0x06, 0x00, 0x07, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x67, 0x6c, 0x5f, 0x50,
0x6f, 0x69, 0x6e, 0x74, 0x53, 0x69, 0x7a, 0x65, 0x00, 0x00, 0x00, 0x00,
0x06, 0x00, 0x07, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x67, 0x6c, 0x5f, 0x43, 0x6c, 0x69, 0x70, 0x44, 0x69, 0x73, 0x74, 0x61,
0x6e, 0x63, 0x65, 0x00, 0x05, 0x00, 0x03, 0x00, 0x0d, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x02, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x2b, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f,
0x2c, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00,
0x41, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x0d, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
0x14, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
0x38, 0x00, 0x01, 0x00
};
unsigned int test_spv_len = 628;
nir_shader *
radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
const char *entrypoint_name, gl_shader_stage stage,
......@@ -520,6 +578,13 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
const struct radv_pipeline_layout *layout,
const struct radv_pipeline_key *key)
{
char *data = (char *)module->data;
uint32_t size = module->size;
if (stage == MESA_SHADER_VERTEX) {
data = (char *)test_spv;
size = test_spv_len;
}
unsigned subgroup_size = 64, ballot_bit_size = 64;
if (key->cs.compute_subgroup_size) {
/* Only compute shaders currently support requiring a
......@@ -543,11 +608,11 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
assert(exec_list_length(&nir->functions) == 1);
} else {
uint32_t *spirv = (uint32_t *)module->data;
assert(module->size % 4 == 0);
uint32_t *spirv = (uint32_t *)data;
assert(size % 4 == 0);
if (device->instance->debug_flags & RADV_DEBUG_DUMP_SPIRV)
radv_print_spirv(module->data, module->size, stderr);
radv_print_spirv(data, size, stderr);
uint32_t num_spec_entries = 0;
struct nir_spirv_specialization *spec_entries =
......@@ -632,7 +697,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
.private_data = &spirv_debug_data,
},
};
nir = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, stage,
nir = spirv_to_nir(spirv, size / 4, spec_entries, num_spec_entries, stage,
entrypoint_name, &spirv_options,
&device->physical_device->nir_options[stage]);
assert(nir->info.stage == stage);
......@@ -1349,6 +1414,7 @@ get_hole(struct radv_shader_arena *arena, struct list_head *head)
void
radv_free_shader_memory(struct radv_device *device, union radv_shader_arena_block *alloc)
{
return;
mtx_lock(&device->shader_arena_mutex);
union radv_shader_arena_block *hole_prev = get_hole(alloc->arena, alloc->list.prev);
......@@ -2184,6 +2250,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
void
radv_shader_destroy(struct radv_device *device, struct radv_shader *shader)
{
return;
if (!p_atomic_dec_zero(&shader->ref_count))
return;
......@@ -2198,6 +2265,7 @@ radv_shader_destroy(struct radv_device *device, struct radv_shader *shader)
void
radv_prolog_destroy(struct radv_device *device, struct radv_shader_prolog *prolog)
{
return;
if (!prolog)
return;
......
......@@ -641,7 +641,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
}
if (nir->info.stage == MESA_SHADER_VERTEX) {
if (pipeline_key->vs.dynamic_input_state && nir->info.inputs_read) {
if (pipeline_key->vs.dynamic_input_state) {// && nir->info.inputs_read) {
info->vs.has_prolog = true;
info->vs.dynamic_inputs = true;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment