Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Rhys Perry
mesa
Commits
8c79f9ab
Commit
8c79f9ab
authored
May 12, 2022
by
Rhys Perry
Browse files
clang-format RADV/ACO
parent
43d0e85f
Pipeline
#584311
waiting for manual action with stages
Changes
18
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
src/amd/compiler/aco_instruction_selection.cpp
View file @
8c79f9ab
...
...
@@ -253,7 +253,8 @@ emit_masked_swizzle(isel_context* ctx, Builder& bld, Temp src, unsigned mask)
dpp_ctrl = dpp_row_mirror;
} else if (and_mask == 0x1f && !or_mask && xor_mask == 0x7) {
dpp_ctrl = dpp_row_half_mirror;
} else if (ctx->options->gfx_level >= GFX10 && (and_mask & 0x18) == 0x18 && or_mask < 8 && xor_mask < 8) {
} else if (ctx->options->gfx_level >= GFX10 && (and_mask & 0x18) == 0x18 && or_mask < 8 &&
xor_mask < 8) {
// DPP8 comes last, as it does not allow several modifiers like `abs` that are available with DPP16
Builder::Result ret = bld.vop1_dpp8(aco_opcode::v_mov_b32, bld.def(v1), src);
for (unsigned i = 0; i < 8; i++) {
...
...
@@ -11525,8 +11526,8 @@ select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shade
{
assert(options->gfx_level == GFX8);
init_program(program, compute_cs, info, options->gfx_level,
options->family, options->wgp_mode,
config);
init_program(program, compute_cs, info, options->gfx_level,
options->family, options->wgp_mode,
config);
isel_context ctx = {};
ctx.program = program;
...
...
@@ -11693,8 +11694,8 @@ select_vs_prolog(Program* program, const struct aco_vs_prolog_key* key, ac_shade
unsigned max_user_sgprs = options->gfx_level >= GFX9 ? 32 : 16;
*num_preserved_sgprs = max_user_sgprs + 14;
init_program(program, compute_cs, info, options->gfx_level,
options->family, options->wgp_mode,
config);
init_program(program, compute_cs, info, options->gfx_level,
options->family, options->wgp_mode,
config);
Block* block = program->create_and_insert_block();
block->kind = block_kind_top_level;
...
...
src/amd/compiler/aco_instruction_selection_setup.cpp
View file @
8c79f9ab
...
...
@@ -907,8 +907,8 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
else
unreachable
(
"Shader stage not implemented"
);
init_program
(
program
,
Stage
{
hw_stage
,
sw_stage
},
info
,
options
->
gfx_level
,
options
->
family
,
options
->
wgp_mode
,
config
);
init_program
(
program
,
Stage
{
hw_stage
,
sw_stage
},
info
,
options
->
gfx_level
,
options
->
family
,
options
->
wgp_mode
,
config
);
isel_context
ctx
=
{};
ctx
.
program
=
program
;
...
...
src/amd/compiler/aco_ir.cpp
View file @
8c79f9ab
...
...
@@ -89,8 +89,7 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
program
->
lane_mask
=
program
->
wave_size
==
32
?
s1
:
s2
;
program
->
dev
.
lds_encoding_granule
=
gfx_level
>=
GFX7
?
512
:
256
;
program
->
dev
.
lds_alloc_granule
=
gfx_level
>=
GFX10_3
?
1024
:
program
->
dev
.
lds_encoding_granule
;
program
->
dev
.
lds_alloc_granule
=
gfx_level
>=
GFX10_3
?
1024
:
program
->
dev
.
lds_encoding_granule
;
program
->
dev
.
lds_limit
=
gfx_level
>=
GFX7
?
65536
:
32768
;
/* apparently gfx702 also has 16-bank LDS but I can't find a family for that */
program
->
dev
.
has_16bank_lds
=
family
==
CHIP_KABINI
||
family
==
CHIP_STONEY
;
...
...
src/amd/compiler/aco_optimizer.cpp
View file @
8c79f9ab
...
...
@@ -3942,9 +3942,9 @@ combine_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
mad_op
=
emit_fma
?
aco_opcode
::
v_fma_legacy_f32
:
aco_opcode
::
v_mad_legacy_f32
;
}
else
if
(
mad16
)
{
mad_op
=
emit_fma
?
(
ctx
.
program
->
gfx_level
==
GFX8
?
aco_opcode
::
v_fma_legacy_f16
:
aco_opcode
::
v_fma_f16
)
:
aco_opcode
::
v_fma_f16
)
:
(
ctx
.
program
->
gfx_level
==
GFX8
?
aco_opcode
::
v_mad_legacy_f16
:
aco_opcode
::
v_mad_f16
);
:
aco_opcode
::
v_mad_f16
);
}
else
if
(
mad64
)
{
mad_op
=
aco_opcode
::
v_fma_f64
;
}
...
...
src/amd/compiler/aco_print_asm.cpp
View file @
8c79f9ab
...
...
@@ -268,8 +268,8 @@ fail:
#ifdef LLVM_AVAILABLE
std
::
pair
<
bool
,
size_t
>
disasm_instr
(
amd_gfx_level
gfx_level
,
LLVMDisasmContextRef
disasm
,
uint32_t
*
binary
,
unsigned
exec_size
,
size_t
pos
,
char
*
outline
,
unsigned
outline_size
)
disasm_instr
(
amd_gfx_level
gfx_level
,
LLVMDisasmContextRef
disasm
,
uint32_t
*
binary
,
unsigned
exec_size
,
size_t
pos
,
char
*
outline
,
unsigned
outline_size
)
{
size_t
l
=
LLVMDisasmInstruction
(
disasm
,
(
uint8_t
*
)
&
binary
[
pos
],
(
exec_size
-
pos
)
*
sizeof
(
uint32_t
),
...
...
@@ -284,14 +284,17 @@ disasm_instr(amd_gfx_level gfx_level, LLVMDisasmContextRef disasm, uint32_t* bin
bool
invalid
=
false
;
size_t
size
;
if
(
!
l
&&
((
gfx_level
>=
GFX9
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd1348000
)
||
/* v_add_u32_e64 + clamp */
(
gfx_level
>=
GFX10
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd7038000
)
||
/* v_add_u16_e64 + clamp */
(
gfx_level
<=
GFX9
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd1268000
)
||
/* v_add_u16_e64 + clamp */
((
gfx_level
>=
GFX9
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd1348000
)
||
/* v_add_u32_e64 + clamp */
(
gfx_level
>=
GFX10
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd7038000
)
||
/* v_add_u16_e64 + clamp */
(
gfx_level
<=
GFX9
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd1268000
)
||
/* v_add_u16_e64 + clamp */
(
gfx_level
>=
GFX10
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd76d8000
)
||
/* v_add3_u32 + clamp */
(
gfx_level
==
GFX9
&&
(
binary
[
pos
]
&
0xffff8000
)
==
0xd1ff8000
))
/* v_add3_u32 + clamp */
)
{
strcpy
(
outline
,
"
\t
integer addition + clamp"
);
bool
has_literal
=
gfx_level
>=
GFX10
&&
(((
binary
[
pos
+
1
]
&
0x1ff
)
==
0xff
)
||
(((
binary
[
pos
+
1
]
>>
9
)
&
0x1ff
)
==
0xff
));
(((
binary
[
pos
+
1
]
>>
9
)
&
0x1ff
)
==
0xff
));
size
=
2
+
has_literal
;
}
else
if
(
gfx_level
>=
GFX10
&&
l
==
4
&&
((
binary
[
pos
]
&
0xfe0001ff
)
==
0x020000f9
))
{
strcpy
(
outline
,
"
\t
v_cndmask_b32 + sdwa"
);
...
...
src/amd/compiler/aco_register_allocation.cpp
View file @
8c79f9ab
...
...
@@ -490,8 +490,8 @@ print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
}
unsigned
get_subdword_operand_stride
(
amd_gfx_level
gfx_level
,
const
aco_ptr
<
Instruction
>&
instr
,
unsigned
idx
,
RegClass
rc
)
get_subdword_operand_stride
(
amd_gfx_level
gfx_level
,
const
aco_ptr
<
Instruction
>&
instr
,
unsigned
idx
,
RegClass
rc
)
{
if
(
instr
->
isPseudo
())
{
/* v_readfirstlane_b32 cannot use SDWA */
...
...
src/amd/compiler/aco_validate.cpp
View file @
8c79f9ab
...
...
@@ -156,8 +156,8 @@ validate_ir(Program* program)
check
(
program
->
gfx_level
>=
GFX8
,
"SDWA is GFX8+ only"
,
instr
.
get
());
SDWA_instruction
&
sdwa
=
instr
->
sdwa
();
check
(
sdwa
.
omod
==
0
||
program
->
gfx_level
>=
GFX9
,
"SDWA omod only supported on GFX9+"
,
instr
.
get
());
check
(
sdwa
.
omod
==
0
||
program
->
gfx_level
>=
GFX9
,
"SDWA omod only supported on GFX9+"
,
instr
.
get
());
if
(
base_format
==
Format
::
VOPC
)
{
check
(
sdwa
.
clamp
==
false
||
program
->
gfx_level
==
GFX8
,
"SDWA VOPC clamp only supported on GFX8"
,
instr
.
get
());
...
...
@@ -224,8 +224,8 @@ validate_ir(Program* program)
/* check opsel */
if
(
instr
->
isVOP3
())
{
VOP3_instruction
&
vop3
=
instr
->
vop3
();
check
(
vop3
.
opsel
==
0
||
program
->
gfx_level
>=
GFX9
,
"Opsel is only supported on GFX9+"
,
instr
.
get
());
check
(
vop3
.
opsel
==
0
||
program
->
gfx_level
>=
GFX9
,
"Opsel is only supported on GFX9+"
,
instr
.
get
());
for
(
unsigned
i
=
0
;
i
<
3
;
i
++
)
{
if
(
i
>=
instr
->
operands
.
size
()
||
...
...
@@ -762,7 +762,8 @@ ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
}
bool
validate_subdword_operand
(
amd_gfx_level
gfx_level
,
const
aco_ptr
<
Instruction
>&
instr
,
unsigned
index
)
validate_subdword_operand
(
amd_gfx_level
gfx_level
,
const
aco_ptr
<
Instruction
>&
instr
,
unsigned
index
)
{
Operand
op
=
instr
->
operands
[
index
];
unsigned
byte
=
op
.
physReg
().
byte
();
...
...
src/amd/vulkan/radv_cmd_buffer.c
View file @
8c79f9ab
...
...
@@ -2609,11 +2609,12 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
enum
amd_gfx_level
gfx_level
=
cmd_buffer
->
device
->
physical_device
->
rad_info
.
gfx_level
;
uint8_t
watermark
=
gfx_level
>=
GFX10
?
6
:
4
;
radeon_set_context_reg
(
cmd_buffer
->
cs
,
R_028424_CB_DCC_CONTROL
,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE
(
gfx_level
<=
GFX9
)
|
S_028424_OVERWRITE_COMBINER_WATERMARK
(
watermark
)
|
S_028424_DISABLE_CONSTANT_ENCODE_AC01
(
disable_constant_encode_ac01
)
|
S_028424_DISABLE_CONSTANT_ENCODE_REG
(
disable_constant_encode
));
radeon_set_context_reg
(
cmd_buffer
->
cs
,
R_028424_CB_DCC_CONTROL
,
S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE
(
gfx_level
<=
GFX9
)
|
S_028424_OVERWRITE_COMBINER_WATERMARK
(
watermark
)
|
S_028424_DISABLE_CONSTANT_ENCODE_AC01
(
disable_constant_encode_ac01
)
|
S_028424_DISABLE_CONSTANT_ENCODE_REG
(
disable_constant_encode
));
}
cmd_buffer
->
state
.
dirty
&=
~
RADV_CMD_DIRTY_FRAMEBUFFER
;
...
...
@@ -3840,15 +3841,14 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_d
*/
if
(
info
->
gfx_level
==
GFX10_3
&&
state
->
active_pipeline_queries
>
0
&&
(
draw_info
->
instance_count
>
1
||
draw_info
->
indirect
)
&&
(
topology
==
V_008958_DI_PT_LINELIST_ADJ
||
topology
==
V_008958_DI_PT_LINESTRIP_ADJ
||
topology
==
V_008958_DI_PT_TRILIST_ADJ
||
topology
==
V_008958_DI_PT_TRISTRIP_ADJ
))
{
(
topology
==
V_008958_DI_PT_LINELIST_ADJ
||
topology
==
V_008958_DI_PT_LINESTRIP_ADJ
||
topology
==
V_008958_DI_PT_TRILIST_ADJ
||
topology
==
V_008958_DI_PT_TRISTRIP_ADJ
))
{
disable_instance_packing
=
true
;
}
if
((
draw_info
->
indexed
&&
state
->
index_type
!=
state
->
last_index_type
)
||
(
info
->
gfx_level
==
GFX10_3
&&
(
state
->
last_index_type
==
-
1
||
(
info
->
gfx_level
==
GFX10_3
&&
(
state
->
last_index_type
==
-
1
||
disable_instance_packing
!=
G_028A7C_DISABLE_INSTANCE_PACKING
(
state
->
last_index_type
))))
{
uint32_t
index_type
=
state
->
index_type
|
S_028A7C_DISABLE_INSTANCE_PACKING
(
disable_instance_packing
);
...
...
@@ -6270,7 +6270,8 @@ radv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer,
const
int
index_size
=
radv_get_vgt_index_size
(
state
->
index_type
);
unsigned
i
=
0
;
const
bool
uses_drawid
=
state
->
pipeline
->
graphics
.
uses_drawid
;
const
bool
can_eop
=
!
uses_drawid
&&
cmd_buffer
->
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX10
;
const
bool
can_eop
=
!
uses_drawid
&&
cmd_buffer
->
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX10
;
if
(
uses_drawid
)
{
if
(
vertexOffset
)
{
...
...
src/amd/vulkan/radv_debug.c
View file @
8c79f9ab
...
...
@@ -83,8 +83,7 @@ radv_init_trace(struct radv_device *device)
if
(
!
device
->
trace_id_ptr
)
return
false
;
ac_vm_fault_occured
(
device
->
physical_device
->
rad_info
.
gfx_level
,
&
device
->
dmesg_timestamp
,
NULL
);
ac_vm_fault_occured
(
device
->
physical_device
->
rad_info
.
gfx_level
,
&
device
->
dmesg_timestamp
,
NULL
);
return
true
;
}
...
...
src/amd/vulkan/radv_device.c
View file @
8c79f9ab
...
...
@@ -746,12 +746,10 @@ radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm
device
->
rad_info
.
family
!=
CHIP_NAVI14
&&
!
(
device
->
instance
->
debug_flags
&
RADV_DEBUG_NO_NGG
);
device
->
use_ngg_culling
=
device
->
use_ngg
&&
device
->
rad_info
.
max_render_backends
>
1
&&
(
device
->
rad_info
.
gfx_level
>=
GFX10_3
||
(
device
->
instance
->
perftest_flags
&
RADV_PERFTEST_NGGC
))
&&
!
(
device
->
instance
->
debug_flags
&
RADV_DEBUG_NO_NGGC
);
device
->
use_ngg_culling
=
device
->
use_ngg
&&
device
->
rad_info
.
max_render_backends
>
1
&&
(
device
->
rad_info
.
gfx_level
>=
GFX10_3
||
(
device
->
instance
->
perftest_flags
&
RADV_PERFTEST_NGGC
))
&&
!
(
device
->
instance
->
debug_flags
&
RADV_DEBUG_NO_NGGC
);
device
->
use_ngg_streamout
=
false
;
...
...
@@ -1625,8 +1623,8 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT
*
features
=
(
VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT
*
)
ext
;
bool
has_shader_buffer_float_minmax
=
radv_has_shader_buffer_float_minmax
(
pdevice
);
bool
has_shader_image_float_minmax
=
pdevice
->
rad_info
.
gfx_level
!=
GFX8
&&
pdevice
->
rad_info
.
gfx_level
!=
GFX9
;
bool
has_shader_image_float_minmax
=
pdevice
->
rad_info
.
gfx_level
!=
GFX8
&&
pdevice
->
rad_info
.
gfx_level
!=
GFX9
;
features
->
shaderBufferFloat16Atomics
=
false
;
features
->
shaderBufferFloat16AtomicAdd
=
false
;
features
->
shaderBufferFloat16AtomicMinMax
=
false
;
...
...
@@ -5661,8 +5659,7 @@ radv_initialise_color_surface(struct radv_device *device, struct radv_color_buff
cb
->
cb_dcc_control
=
radv_init_dcc_control_reg
(
device
,
iview
);
/* This must be set for fast clear to work without FMASK. */
if
(
!
radv_image_has_fmask
(
iview
->
image
)
&&
device
->
physical_device
->
rad_info
.
gfx_level
==
GFX6
)
{
if
(
!
radv_image_has_fmask
(
iview
->
image
)
&&
device
->
physical_device
->
rad_info
.
gfx_level
==
GFX6
)
{
unsigned
bankh
=
util_logbase2
(
surf
->
u
.
legacy
.
bankh
);
cb
->
cb_color_attrib
|=
S_028C74_FMASK_BANK_HEIGHT
(
bankh
);
}
...
...
src/amd/vulkan/radv_meta_bufimage.c
View file @
8c79f9ab
...
...
@@ -1324,7 +1324,7 @@ get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
{
unsigned
stride
;
if
(
cmd_buffer
->
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX9
)
{
if
(
cmd_buffer
->
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX9
)
{
stride
=
surf
->
image
->
planes
[
0
].
surface
.
u
.
gfx9
.
surf_pitch
;
}
else
{
stride
=
surf
->
image
->
planes
[
0
].
surface
.
u
.
legacy
.
level
[
0
].
nblk_x
*
3
;
...
...
src/amd/vulkan/radv_nir_lower_abi.c
View file @
8c79f9ab
...
...
@@ -221,10 +221,8 @@ filter_abi_instr(const nir_instr *instr,
}
void
radv_nir_lower_abi
(
nir_shader
*
shader
,
enum
amd_gfx_level
gfx_level
,
const
struct
radv_shader_info
*
info
,
const
struct
radv_shader_args
*
args
,
radv_nir_lower_abi
(
nir_shader
*
shader
,
enum
amd_gfx_level
gfx_level
,
const
struct
radv_shader_info
*
info
,
const
struct
radv_shader_args
*
args
,
const
struct
radv_pipeline_key
*
pl_key
)
{
lower_abi_state
state
=
{
...
...
src/amd/vulkan/radv_nir_to_llvm.c
View file @
8c79f9ab
...
...
@@ -2025,8 +2025,8 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
float_mode
=
AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO
;
}
ac_llvm_context_init
(
&
ctx
.
ac
,
ac_llvm
,
options
->
gfx_level
,
options
->
family
,
options
->
info
,
float_mode
,
info
->
wave_size
,
info
->
ballot_bit_size
);
ac_llvm_context_init
(
&
ctx
.
ac
,
ac_llvm
,
options
->
gfx_level
,
options
->
family
,
options
->
info
,
float_mode
,
info
->
wave_size
,
info
->
ballot_bit_size
);
ctx
.
context
=
ctx
.
ac
.
context
;
ctx
.
max_workgroup_size
=
info
->
workgroup_size
;
...
...
@@ -2407,8 +2407,8 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
assert
(
args
->
is_gs_copy_shader
);
ac_llvm_context_init
(
&
ctx
.
ac
,
ac_llvm
,
options
->
gfx_level
,
options
->
family
,
options
->
info
,
AC_FLOAT_MODE_DEFAULT
,
64
,
64
);
ac_llvm_context_init
(
&
ctx
.
ac
,
ac_llvm
,
options
->
gfx_level
,
options
->
family
,
options
->
info
,
AC_FLOAT_MODE_DEFAULT
,
64
,
64
);
ctx
.
context
=
ctx
.
ac
.
context
;
ctx
.
stage
=
MESA_SHADER_VERTEX
;
...
...
src/amd/vulkan/radv_pipeline.c
View file @
8c79f9ab
...
...
@@ -2180,10 +2180,9 @@ gfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline
assert
(
max_prims_per_subgroup
<=
max_out_prims
);
gl_shader_stage
es_stage
=
has_tess
?
MESA_SHADER_TESS_EVAL
:
MESA_SHADER_VERTEX
;
unsigned
workgroup_size
=
ac_compute_esgs_workgroup_size
(
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
,
stages
[
es_stage
].
info
.
wave_size
,
es_verts_per_subgroup
,
gs_inst_prims_in_subgroup
);
unsigned
workgroup_size
=
ac_compute_esgs_workgroup_size
(
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
,
stages
[
es_stage
].
info
.
wave_size
,
es_verts_per_subgroup
,
gs_inst_prims_in_subgroup
);
stages
[
es_stage
].
info
.
workgroup_size
=
workgroup_size
;
stages
[
MESA_SHADER_GEOMETRY
].
info
.
workgroup_size
=
workgroup_size
;
}
...
...
@@ -2221,7 +2220,8 @@ radv_get_num_input_vertices(const struct radv_pipeline_stage *stages)
}
static
void
gfx10_emit_ge_pc_alloc
(
struct
radeon_cmdbuf
*
cs
,
enum
amd_gfx_level
gfx_level
,
uint32_t
oversub_pc_lines
)
gfx10_emit_ge_pc_alloc
(
struct
radeon_cmdbuf
*
cs
,
enum
amd_gfx_level
gfx_level
,
uint32_t
oversub_pc_lines
)
{
radeon_set_uconfig_reg
(
cs
,
R_030980_GE_PC_ALLOC
,
...
...
@@ -3481,7 +3481,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
gl_shader_stage
pre_stage
=
stages
[
MESA_SHADER_TESS_EVAL
].
nir
?
MESA_SHADER_TESS_EVAL
:
MESA_SHADER_VERTEX
;
radv_declare_shader_args
(
gfx_level
,
pipeline_key
,
&
stages
[
MESA_SHADER_GEOMETRY
].
info
,
MESA_SHADER_GEOMETRY
,
true
,
pre_stage
,
&
stages
[
MESA_SHADER_GEOMETRY
].
args
);
MESA_SHADER_GEOMETRY
,
true
,
pre_stage
,
&
stages
[
MESA_SHADER_GEOMETRY
].
args
);
stages
[
MESA_SHADER_GEOMETRY
].
info
.
user_sgprs_locs
=
stages
[
MESA_SHADER_GEOMETRY
].
args
.
user_sgprs_locs
;
stages
[
MESA_SHADER_GEOMETRY
].
info
.
inline_push_constant_mask
=
stages
[
MESA_SHADER_GEOMETRY
].
args
.
ac
.
inline_push_const_mask
;
...
...
@@ -3492,8 +3493,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stag
}
u_foreach_bit
(
i
,
active_stages
)
{
radv_declare_shader_args
(
gfx_level
,
pipeline_key
,
&
stages
[
i
].
info
,
i
,
false
,
MESA_SHADER_VERTEX
,
&
stages
[
i
].
args
);
radv_declare_shader_args
(
gfx_level
,
pipeline_key
,
&
stages
[
i
].
info
,
i
,
false
,
MESA_SHADER_VERTEX
,
&
stages
[
i
].
args
);
stages
[
i
].
info
.
user_sgprs_locs
=
stages
[
i
].
args
.
user_sgprs_locs
;
stages
[
i
].
info
.
inline_push_constant_mask
=
stages
[
i
].
args
.
ac
.
inline_push_const_mask
;
}
...
...
@@ -3568,8 +3569,9 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
tess_in_patch_size
,
tess_out_patch_size
,
stages
[
MESA_SHADER_TESS_CTRL
].
info
.
tcs
.
num_linked_inputs
,
stages
[
MESA_SHADER_TESS_CTRL
].
info
.
tcs
.
num_linked_outputs
,
stages
[
MESA_SHADER_TESS_CTRL
].
info
.
tcs
.
num_linked_patch_outputs
,
device
->
hs
.
tess_offchip_block_dw_size
,
device
->
physical_device
->
rad_info
.
gfx_level
,
device
->
physical_device
->
rad_info
.
family
);
stages
[
MESA_SHADER_TESS_CTRL
].
info
.
tcs
.
num_linked_patch_outputs
,
device
->
hs
.
tess_offchip_block_dw_size
,
device
->
physical_device
->
rad_info
.
gfx_level
,
device
->
physical_device
->
rad_info
.
family
);
/* LDS size used by VS+TCS for storing TCS inputs and outputs. */
unsigned
tcs_lds_size
=
calculate_tess_lds_size
(
...
...
@@ -3625,9 +3627,8 @@ gather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages,
for
(
gl_shader_stage
s
=
MESA_SHADER_VERTEX
;
s
<=
MESA_SHADER_TESS_CTRL
;
++
s
)
stages
[
s
].
info
.
workgroup_size
=
ac_compute_lshs_workgroup_size
(
device
->
physical_device
->
rad_info
.
gfx_level
,
s
,
num_patches
,
tess_in_patch_size
,
tess_out_patch_size
);
ac_compute_lshs_workgroup_size
(
device
->
physical_device
->
rad_info
.
gfx_level
,
s
,
num_patches
,
tess_in_patch_size
,
tess_out_patch_size
);
}
static
bool
...
...
@@ -4543,7 +4544,7 @@ radv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout
NIR_PASS_V
(
stages
[
i
].
nir
,
nir_opt_vectorize
,
opt_vectorize_callback
,
NULL
);
}
}
/* cleanup passes */
nir_lower_load_const_to_scalar
(
stages
[
i
].
nir
);
...
...
@@ -4761,17 +4762,17 @@ radv_pipeline_stage_to_user_data_0(struct radv_pipeline *pipeline, gl_shader_sta
return
R_00B130_SPI_SHADER_USER_DATA_VS_0
;
case
MESA_SHADER_GEOMETRY
:
return
gfx_level
==
GFX9
?
R_00B330_SPI_SHADER_USER_DATA_ES_0
:
R_00B230_SPI_SHADER_USER_DATA_GS_0
;
:
R_00B230_SPI_SHADER_USER_DATA_GS_0
;
case
MESA_SHADER_COMPUTE
:
case
MESA_SHADER_TASK
:
return
R_00B900_COMPUTE_USER_DATA_0
;
case
MESA_SHADER_TESS_CTRL
:
return
gfx_level
==
GFX9
?
R_00B430_SPI_SHADER_USER_DATA_LS_0
:
R_00B430_SPI_SHADER_USER_DATA_HS_0
;
:
R_00B430_SPI_SHADER_USER_DATA_HS_0
;
case
MESA_SHADER_TESS_EVAL
:
if
(
has_gs
)
{
return
gfx_level
>=
GFX10
?
R_00B230_SPI_SHADER_USER_DATA_GS_0
:
R_00B330_SPI_SHADER_USER_DATA_ES_0
;
:
R_00B330_SPI_SHADER_USER_DATA_ES_0
;
}
else
if
(
has_ngg
)
{
return
R_00B230_SPI_SHADER_USER_DATA_GS_0
;
}
else
{
...
...
@@ -5453,7 +5454,8 @@ radv_pipeline_generate_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
}
if
(
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX10
)
{
uint32_t
oversub_pc_lines
=
late_alloc_wave64
?
pipeline
->
device
->
physical_device
->
rad_info
.
pc_lines
/
4
:
0
;
gfx10_emit_ge_pc_alloc
(
cs
,
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
,
oversub_pc_lines
);
gfx10_emit_ge_pc_alloc
(
cs
,
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
,
oversub_pc_lines
);
}
}
...
...
@@ -5645,7 +5647,8 @@ radv_pipeline_generate_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf
oversub_pc_lines
*=
oversub_factor
;
}
gfx10_emit_ge_pc_alloc
(
cs
,
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
,
oversub_pc_lines
);
gfx10_emit_ge_pc_alloc
(
cs
,
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
,
oversub_pc_lines
);
}
static
void
...
...
@@ -6678,8 +6681,7 @@ radv_graphics_pipeline_init(struct radv_pipeline *pipeline, struct radv_device *
* instructions if any are present.
*/
struct
radv_shader
*
ps
=
pipeline
->
shaders
[
MESA_SHADER_FRAGMENT
];
if
((
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
<=
GFX9
||
ps
->
info
.
ps
.
can_discard
)
&&
if
((
pipeline
->
device
->
physical_device
->
rad_info
.
gfx_level
<=
GFX9
||
ps
->
info
.
ps
.
can_discard
)
&&
!
blend
.
spi_shader_col_format
)
{
if
(
!
ps
->
info
.
ps
.
writes_z
&&
!
ps
->
info
.
ps
.
writes_stencil
&&
!
ps
->
info
.
ps
.
writes_sample_mask
)
blend
.
spi_shader_col_format
=
V_028714_SPI_SHADER_32_R
;
...
...
src/amd/vulkan/radv_private.h
View file @
8c79f9ab
...
...
@@ -2390,10 +2390,9 @@ radv_image_get_iterate256(struct radv_device *device, struct radv_image *image)
{
/* ITERATE_256 is required for depth or stencil MSAA images that are TC-compatible HTILE. */
return
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX10
&&
(
image
->
usage
&
(
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT
))
&&
radv_image_is_tc_compat_htile
(
image
)
&&
image
->
info
.
samples
>
1
;
(
image
->
usage
&
(
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT
))
&&
radv_image_is_tc_compat_htile
(
image
)
&&
image
->
info
.
samples
>
1
;
}
unsigned
radv_image_queue_family_mask
(
const
struct
radv_image
*
image
,
...
...
src/amd/vulkan/radv_shader.c
View file @
8c79f9ab
...
...
@@ -1775,7 +1775,7 @@ radv_open_rtld_binary(struct radv_device *device, const struct radv_shader *shad
if
(
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX9
&&
(
binary
->
stage
==
MESA_SHADER_GEOMETRY
||
binary
->
info
.
is_ngg
)
&&
!
binary
->
is_gs_copy_shader
)
{
!
binary
->
is_gs_copy_shader
)
{
struct
ac_rtld_symbol
*
sym
=
&
lds_symbols
[
num_lds_symbols
++
];
sym
->
name
=
"esgs_ring"
;
sym
->
size
=
binary
->
info
.
ngg_info
.
esgs_ring_size
;
...
...
src/amd/vulkan/radv_shader.h
View file @
8c79f9ab
...
...
@@ -526,8 +526,7 @@ nir_shader *radv_shader_compile_to_nir(struct radv_device *device,
const
struct
radv_pipeline_key
*
key
);
void
radv_nir_lower_abi
(
nir_shader
*
shader
,
enum
amd_gfx_level
gfx_level
,
const
struct
radv_shader_info
*
info
,
const
struct
radv_shader_args
*
args
,
const
struct
radv_shader_info
*
info
,
const
struct
radv_shader_args
*
args
,
const
struct
radv_pipeline_key
*
pl_key
);
void
radv_init_shader_arenas
(
struct
radv_device
*
device
);
...
...
src/amd/vulkan/si_cmd_buffer.c
View file @
8c79f9ab
...
...
@@ -981,9 +981,10 @@ si_emit_acquire_mem(struct radeon_cmdbuf *cs, bool is_mec, bool is_gfx9, unsigne
}
static
void
gfx10_cs_emit_cache_flush
(
struct
radeon_cmdbuf
*
cs
,
enum
amd_gfx_level
gfx_level
,
uint32_t
*
flush_cnt
,
uint64_t
flush_va
,
bool
is_mec
,
enum
radv_cmd_flush_bits
flush_bits
,
enum
rgp_flush_bits
*
sqtt_flush_bits
,
uint64_t
gfx9_eop_bug_va
)
gfx10_cs_emit_cache_flush
(
struct
radeon_cmdbuf
*
cs
,
enum
amd_gfx_level
gfx_level
,
uint32_t
*
flush_cnt
,
uint64_t
flush_va
,
bool
is_mec
,
enum
radv_cmd_flush_bits
flush_bits
,
enum
rgp_flush_bits
*
sqtt_flush_bits
,
uint64_t
gfx9_eop_bug_va
)
{
uint32_t
gcr_cntl
=
0
;
unsigned
cb_db_event
=
0
;
...
...
@@ -1486,8 +1487,8 @@ si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer, uint64_t dst_va, uint64_t src
command
|=
S_415_RAW_WAIT
(
1
);
/* Src and dst flags. */
if
(
cmd_buffer
->
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX9
&&
!
(
flags
&
CP_DMA_CLEAR
)
&&
src_va
==
dst_va
)
if
(
cmd_buffer
->
device
->
physical_device
->
rad_info
.
gfx_level
>=
GFX9
&&
!
(
flags
&
CP_DMA_CLEAR
)
&&
src_va
==
dst_va
)
header
|=
S_411_DST_SEL
(
V_411_NOWHERE
);
/* prefetch only */
else
if
(
flags
&
CP_DMA_USE_L2
)
header
|=
S_411_DST_SEL
(
V_411_DST_ADDR_TC_L2
);
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment