Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Joshua Ashton
mesa
Commits
fec65187
Commit
fec65187
authored
Mar 16, 2022
by
Joshua Ashton
🐸
Browse files
prolog debugging
parent
be9023e4
Pipeline
#533761
waiting for manual action with stages
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/amd/compiler/aco_instruction_selection.cpp
View file @
fec65187
...
...
@@ -12015,8 +12015,6 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
const struct radv_shader_info* info,
const struct radv_shader_args* args, unsigned* num_preserved_sgprs)
{
assert(key->num_attributes > 0);
/* This should be enough for any shader/stage. */
unsigned max_user_sgprs = options->chip_class >= GFX9 ? 32 : 16;
*num_preserved_sgprs = max_user_sgprs + 14;
...
...
@@ -12032,7 +12030,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
Builder bld(program, block);
block->instructions.reserve(16 + key->num_attributes * 4);
block->instructions.reserve(16 +
MAX2(
key->num_attributes
, 1)
* 4);
bld.sopp(aco_opcode::s_setprio, -1u, 0x3u);
...
...
@@ -12053,13 +12051,14 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
PhysReg attributes_start(256 + args->ac.num_vgprs_used);
/* choose vgprs that won't be used for anything else until the last attribute load */
PhysReg vertex_index(attributes_start.reg() + key->num_attributes * 4 - 1);
PhysReg instance_index(attributes_start.reg() + key->num_attributes * 4 - 2);
PhysReg start_instance_vgpr(attributes_start.reg() + key->num_attributes * 4 - 3);
PhysReg nontrivial_tmp_vgpr0(attributes_start.reg() + key->num_attributes * 4 - 4);
PhysReg nontrivial_tmp_vgpr1(attributes_start.reg() + key->num_attributes * 4);
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
PhysReg vertex_index(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 1);
PhysReg instance_index(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 2);
PhysReg start_instance_vgpr(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 3);
PhysReg nontrivial_tmp_vgpr0(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4 - 4);
PhysReg nontrivial_tmp_vgpr1(attributes_start.reg() + MAX2(key->num_attributes, 1) * 4);
if (key->num_attributes)
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
get_arg_fixed(args, args->ac.vertex_buffers));
if (options->address32_hi >= 0xffff8000 || options->address32_hi <= 0x7fff) {
bld.sopk(aco_opcode::s_movk_i32, Definition(vertex_buffers.advance(4), s1),
...
...
@@ -12071,7 +12070,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
/* calculate vgpr requirements */
unsigned num_vgprs = attributes_start.reg() - 256;
num_vgprs += key->num_attributes * 4;
num_vgprs +=
MAX2(
key->num_attributes
, 1)
* 4;
if (has_nontrivial_divisors && program->chip_class <= GFX8)
num_vgprs++; /* make space for nontrivial_tmp_vgpr1 */
unsigned num_sgprs = 0;
...
...
src/amd/vulkan/radv_cmd_buffer.c
View file @
fec65187
...
...
@@ -2711,7 +2711,7 @@ radv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_
/* From total number of attributes to offset. */
static
const
uint16_t
total_to_offset
[
16
]
=
{
0
,
1
,
4
,
10
,
20
,
35
,
56
,
84
,
120
,
165
,
220
,
286
,
364
,
455
,
560
,
680
};
unsigned
start_index
=
total_to_offset
[
num_attributes
-
1
];
unsigned
start_index
=
total_to_offset
[
num_attributes
];
/* From number of instanced attributes to offset. This would require a different LUT depending on
* the total number of attributes, but we can exploit a pattern to use just the LUT for 16 total
...
...
@@ -2794,7 +2794,7 @@ lookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shad
(
!
vs_shader
->
info
.
vs
.
as_ls
||
!
instance_rate_inputs
)
&&
!
misaligned_mask
&&
!
state
->
alpha_adjust_lo
&&
!
state
->
alpha_adjust_hi
)
{
if
(
!
instance_rate_inputs
)
{
prolog
=
device
->
simple_vs_prologs
[
num_attributes
-
1
];
prolog
=
device
->
simple_vs_prologs
[
num_attributes
];
}
else
if
(
num_attributes
<=
16
&&
!*
nontrivial_divisors
&&
util_bitcount
(
instance_rate_inputs
)
==
(
util_last_bit
(
instance_rate_inputs
)
-
ffs
(
instance_rate_inputs
)
+
1
))
{
...
...
@@ -2994,6 +2994,7 @@ emit_prolog_inputs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_sh
struct
radv_userdata_info
*
loc
=
&
vs_shader
->
info
.
user_sgprs_locs
.
shader_data
[
AC_UD_VS_PROLOG_INPUTS
];
uint32_t
base_reg
=
cmd_buffer
->
state
.
pipeline
->
user_data_0
[
MESA_SHADER_VERTEX
];
fprintf
(
stderr
,
"prolog_jump_va: 0x%lx - base_reg: %u - sh_offset: %u
\n
"
,
input_va
,
base_reg
,
base_reg
+
loc
->
sgpr_idx
*
4
);
assert
(
loc
->
sgpr_idx
!=
-
1
);
assert
(
loc
->
num_sgprs
==
2
);
radv_emit_shader_pointer
(
cmd_buffer
->
device
,
cmd_buffer
->
cs
,
base_reg
+
loc
->
sgpr_idx
*
4
,
...
...
src/amd/vulkan/radv_device.c
View file @
fec65187
...
...
@@ -2801,14 +2801,14 @@ radv_device_init_vs_prologs(struct radv_device *device)
key
.
next_stage
=
MESA_SHADER_VERTEX
;
key
.
wave32
=
device
->
physical_device
->
ge_wave_size
==
32
;
for
(
unsigned
i
=
1
;
i
<
=
MAX_VERTEX_ATTRIBS
;
i
++
)
{
for
(
unsigned
i
=
0
;
i
<
MAX_VERTEX_ATTRIBS
;
i
++
)
{
state
.
attribute_mask
=
BITFIELD_MASK
(
i
);
state
.
instance_rate_inputs
=
0
;
key
.
num_attributes
=
i
;
device
->
simple_vs_prologs
[
i
-
1
]
=
radv_create_vs_prolog
(
device
,
&
key
);
if
(
!
device
->
simple_vs_prologs
[
i
-
1
])
device
->
simple_vs_prologs
[
i
]
=
radv_create_vs_prolog
(
device
,
&
key
);
if
(
!
device
->
simple_vs_prologs
[
i
])
return
vk_error
(
device
->
physical_device
->
instance
,
VK_ERROR_OUT_OF_DEVICE_MEMORY
);
}
...
...
src/amd/vulkan/radv_shader.c
View file @
fec65187
...
...
@@ -42,6 +42,7 @@
#include
"ac_nir.h"
#include
"ac_rtld.h"
#include
"aco_interface.h"
#include
"shader_enums.h"
#include
"sid.h"
#include
"vk_format.h"
...
...
@@ -513,6 +514,63 @@ radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device)
return
progress
;
}
static
unsigned
char
test_spv
[]
=
{
0x03
,
0x02
,
0x23
,
0x07
,
0x00
,
0x00
,
0x01
,
0x00
,
0x0a
,
0x00
,
0x08
,
0x00
,
0x15
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x11
,
0x00
,
0x02
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x0b
,
0x00
,
0x06
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x47
,
0x4c
,
0x53
,
0x4c
,
0x2e
,
0x73
,
0x74
,
0x64
,
0x2e
,
0x34
,
0x35
,
0x30
,
0x00
,
0x00
,
0x00
,
0x00
,
0x0e
,
0x00
,
0x03
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x0f
,
0x00
,
0x06
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x04
,
0x00
,
0x00
,
0x00
,
0x6d
,
0x61
,
0x69
,
0x6e
,
0x00
,
0x00
,
0x00
,
0x00
,
0x0d
,
0x00
,
0x00
,
0x00
,
0x03
,
0x00
,
0x03
,
0x00
,
0x02
,
0x00
,
0x00
,
0x00
,
0xae
,
0x01
,
0x00
,
0x00
,
0x05
,
0x00
,
0x04
,
0x00
,
0x04
,
0x00
,
0x00
,
0x00
,
0x6d
,
0x61
,
0x69
,
0x6e
,
0x00
,
0x00
,
0x00
,
0x00
,
0x05
,
0x00
,
0x06
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x67
,
0x6c
,
0x5f
,
0x50
,
0x65
,
0x72
,
0x56
,
0x65
,
0x72
,
0x74
,
0x65
,
0x78
,
0x00
,
0x00
,
0x00
,
0x00
,
0x06
,
0x00
,
0x06
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x67
,
0x6c
,
0x5f
,
0x50
,
0x6f
,
0x73
,
0x69
,
0x74
,
0x69
,
0x6f
,
0x6e
,
0x00
,
0x06
,
0x00
,
0x07
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x67
,
0x6c
,
0x5f
,
0x50
,
0x6f
,
0x69
,
0x6e
,
0x74
,
0x53
,
0x69
,
0x7a
,
0x65
,
0x00
,
0x00
,
0x00
,
0x00
,
0x06
,
0x00
,
0x07
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x02
,
0x00
,
0x00
,
0x00
,
0x67
,
0x6c
,
0x5f
,
0x43
,
0x6c
,
0x69
,
0x70
,
0x44
,
0x69
,
0x73
,
0x74
,
0x61
,
0x6e
,
0x63
,
0x65
,
0x00
,
0x05
,
0x00
,
0x03
,
0x00
,
0x0d
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x48
,
0x00
,
0x05
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x48
,
0x00
,
0x05
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x48
,
0x00
,
0x05
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x02
,
0x00
,
0x00
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x03
,
0x00
,
0x00
,
0x00
,
0x47
,
0x00
,
0x03
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x02
,
0x00
,
0x00
,
0x00
,
0x13
,
0x00
,
0x02
,
0x00
,
0x02
,
0x00
,
0x00
,
0x00
,
0x21
,
0x00
,
0x03
,
0x00
,
0x03
,
0x00
,
0x00
,
0x00
,
0x02
,
0x00
,
0x00
,
0x00
,
0x16
,
0x00
,
0x03
,
0x00
,
0x06
,
0x00
,
0x00
,
0x00
,
0x20
,
0x00
,
0x00
,
0x00
,
0x17
,
0x00
,
0x04
,
0x00
,
0x07
,
0x00
,
0x00
,
0x00
,
0x06
,
0x00
,
0x00
,
0x00
,
0x04
,
0x00
,
0x00
,
0x00
,
0x15
,
0x00
,
0x04
,
0x00
,
0x08
,
0x00
,
0x00
,
0x00
,
0x20
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x2b
,
0x00
,
0x04
,
0x00
,
0x08
,
0x00
,
0x00
,
0x00
,
0x09
,
0x00
,
0x00
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x1c
,
0x00
,
0x04
,
0x00
,
0x0a
,
0x00
,
0x00
,
0x00
,
0x06
,
0x00
,
0x00
,
0x00
,
0x09
,
0x00
,
0x00
,
0x00
,
0x1e
,
0x00
,
0x05
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x07
,
0x00
,
0x00
,
0x00
,
0x06
,
0x00
,
0x00
,
0x00
,
0x0a
,
0x00
,
0x00
,
0x00
,
0x20
,
0x00
,
0x04
,
0x00
,
0x0c
,
0x00
,
0x00
,
0x00
,
0x03
,
0x00
,
0x00
,
0x00
,
0x0b
,
0x00
,
0x00
,
0x00
,
0x3b
,
0x00
,
0x04
,
0x00
,
0x0c
,
0x00
,
0x00
,
0x00
,
0x0d
,
0x00
,
0x00
,
0x00
,
0x03
,
0x00
,
0x00
,
0x00
,
0x15
,
0x00
,
0x04
,
0x00
,
0x0e
,
0x00
,
0x00
,
0x00
,
0x20
,
0x00
,
0x00
,
0x00
,
0x01
,
0x00
,
0x00
,
0x00
,
0x2b
,
0x00
,
0x04
,
0x00
,
0x0e
,
0x00
,
0x00
,
0x00
,
0x0f
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x2b
,
0x00
,
0x04
,
0x00
,
0x06
,
0x00
,
0x00
,
0x00
,
0x10
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x2b
,
0x00
,
0x04
,
0x00
,
0x06
,
0x00
,
0x00
,
0x00
,
0x11
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x80
,
0x3f
,
0x2c
,
0x00
,
0x07
,
0x00
,
0x07
,
0x00
,
0x00
,
0x00
,
0x12
,
0x00
,
0x00
,
0x00
,
0x10
,
0x00
,
0x00
,
0x00
,
0x10
,
0x00
,
0x00
,
0x00
,
0x10
,
0x00
,
0x00
,
0x00
,
0x11
,
0x00
,
0x00
,
0x00
,
0x20
,
0x00
,
0x04
,
0x00
,
0x13
,
0x00
,
0x00
,
0x00
,
0x03
,
0x00
,
0x00
,
0x00
,
0x07
,
0x00
,
0x00
,
0x00
,
0x36
,
0x00
,
0x05
,
0x00
,
0x02
,
0x00
,
0x00
,
0x00
,
0x04
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x00
,
0x03
,
0x00
,
0x00
,
0x00
,
0xf8
,
0x00
,
0x02
,
0x00
,
0x05
,
0x00
,
0x00
,
0x00
,
0x41
,
0x00
,
0x05
,
0x00
,
0x13
,
0x00
,
0x00
,
0x00
,
0x14
,
0x00
,
0x00
,
0x00
,
0x0d
,
0x00
,
0x00
,
0x00
,
0x0f
,
0x00
,
0x00
,
0x00
,
0x3e
,
0x00
,
0x03
,
0x00
,
0x14
,
0x00
,
0x00
,
0x00
,
0x12
,
0x00
,
0x00
,
0x00
,
0xfd
,
0x00
,
0x01
,
0x00
,
0x38
,
0x00
,
0x01
,
0x00
};
unsigned
int
test_spv_len
=
628
;
nir_shader
*
radv_shader_compile_to_nir
(
struct
radv_device
*
device
,
struct
vk_shader_module
*
module
,
const
char
*
entrypoint_name
,
gl_shader_stage
stage
,
...
...
@@ -520,6 +578,13 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
const
struct
radv_pipeline_layout
*
layout
,
const
struct
radv_pipeline_key
*
key
)
{
char
*
data
=
(
char
*
)
module
->
data
;
uint32_t
size
=
module
->
size
;
if
(
stage
==
MESA_SHADER_VERTEX
)
{
data
=
(
char
*
)
test_spv
;
size
=
test_spv_len
;
}
unsigned
subgroup_size
=
64
,
ballot_bit_size
=
64
;
if
(
key
->
cs
.
compute_subgroup_size
)
{
/* Only compute shaders currently support requiring a
...
...
@@ -543,11 +608,11 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
assert
(
exec_list_length
(
&
nir
->
functions
)
==
1
);
}
else
{
uint32_t
*
spirv
=
(
uint32_t
*
)
module
->
data
;
assert
(
module
->
size
%
4
==
0
);
uint32_t
*
spirv
=
(
uint32_t
*
)
data
;
assert
(
size
%
4
==
0
);
if
(
device
->
instance
->
debug_flags
&
RADV_DEBUG_DUMP_SPIRV
)
radv_print_spirv
(
module
->
data
,
module
->
size
,
stderr
);
radv_print_spirv
(
data
,
size
,
stderr
);
uint32_t
num_spec_entries
=
0
;
struct
nir_spirv_specialization
*
spec_entries
=
...
...
@@ -632,7 +697,7 @@ radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *
.
private_data
=
&
spirv_debug_data
,
},
};
nir
=
spirv_to_nir
(
spirv
,
module
->
size
/
4
,
spec_entries
,
num_spec_entries
,
stage
,
nir
=
spirv_to_nir
(
spirv
,
size
/
4
,
spec_entries
,
num_spec_entries
,
stage
,
entrypoint_name
,
&
spirv_options
,
&
device
->
physical_device
->
nir_options
[
stage
]);
assert
(
nir
->
info
.
stage
==
stage
);
...
...
@@ -1349,6 +1414,7 @@ get_hole(struct radv_shader_arena *arena, struct list_head *head)
void
radv_free_shader_memory
(
struct
radv_device
*
device
,
union
radv_shader_arena_block
*
alloc
)
{
return
;
mtx_lock
(
&
device
->
shader_arena_mutex
);
union
radv_shader_arena_block
*
hole_prev
=
get_hole
(
alloc
->
arena
,
alloc
->
list
.
prev
);
...
...
@@ -2184,6 +2250,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
void
radv_shader_destroy
(
struct
radv_device
*
device
,
struct
radv_shader
*
shader
)
{
return
;
if
(
!
p_atomic_dec_zero
(
&
shader
->
ref_count
))
return
;
...
...
@@ -2198,6 +2265,7 @@ radv_shader_destroy(struct radv_device *device, struct radv_shader *shader)
void
radv_prolog_destroy
(
struct
radv_device
*
device
,
struct
radv_shader_prolog
*
prolog
)
{
return
;
if
(
!
prolog
)
return
;
...
...
src/amd/vulkan/radv_shader_info.c
View file @
fec65187
...
...
@@ -641,7 +641,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
}
if
(
nir
->
info
.
stage
==
MESA_SHADER_VERTEX
)
{
if
(
pipeline_key
->
vs
.
dynamic_input_state
&&
nir
->
info
.
inputs_read
)
{
if
(
pipeline_key
->
vs
.
dynamic_input_state
)
{
//
&& nir->info.inputs_read) {
info
->
vs
.
has_prolog
=
true
;
info
->
vs
.
dynamic_inputs
=
true
;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment