Commit c129ede5 authored by Rhys Perry's avatar Rhys Perry Committed by Marge Bot
Browse files

aco: use ds_read_{u8,u16}_d16



This allows partial writes and writes to the upper half of the destination.

fossil-db (Sienna Cichlid):
Totals from 135 (0.09% of 149839) affected shaders:
Signed-off-by: Rhys Perry's avatarRhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann's avatarDaniel Schürmann <daniel@schuermann.dev>
Part-of: <mesa/mesa!11113>
parent 6334d73f
Pipeline #335362 waiting for manual action with stages
......@@ -3668,7 +3668,6 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
bool read2 = false;
unsigned size = 0;
aco_opcode op;
//TODO: use ds_read_u8_d16_hi/ds_read_u16_d16_hi if beneficial
if (bytes_needed >= 16 && align % 16 == 0 && large_ds_read) {
size = 16;
op = aco_opcode::ds_read_b128;
......@@ -3691,10 +3690,10 @@ Temp lds_load_callback(Builder& bld, const LoadEmitInfo &info,
op = aco_opcode::ds_read_b32;
} else if (bytes_needed >= 2 && align % 2 == 0) {
size = 2;
op = aco_opcode::ds_read_u16;
op = bld.program->chip_class >= GFX9 ? aco_opcode::ds_read_u16_d16 : aco_opcode::ds_read_u16;
} else {
size = 1;
op = aco_opcode::ds_read_u8;
op = bld.program->chip_class >= GFX9 ? aco_opcode::ds_read_u8_d16 : aco_opcode::ds_read_u8;
}
unsigned const_offset_unit = read2 ? size / 2u : 1u;
......
......@@ -1204,14 +1204,14 @@ DS = {
(0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"),
(0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),
(0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),
( -1, -1, 0x54, 0x54, 0xa0, "ds_write_b8_d16_hi"),
( -1, -1, 0x55, 0x55, 0xa1, "ds_write_b16_d16_hi"),
( -1, -1, 0x56, 0x56, 0xa2, "ds_read_u8_d16"),
( -1, -1, 0x57, 0x57, 0xa3, "ds_read_u8_d16_hi"),
( -1, -1, 0x58, 0x58, 0xa4, "ds_read_i8_d16"),
( -1, -1, 0x59, 0x59, 0xa5, "ds_read_i8_d16_hi"),
( -1, -1, 0x5a, 0x5a, 0xa6, "ds_read_u16_d16"),
( -1, -1, 0x5b, 0x5b, 0xa7, "ds_read_u16_d16_hi"),
( -1, -1, -1, 0x54, 0xa0, "ds_write_b8_d16_hi"),
( -1, -1, -1, 0x55, 0xa1, "ds_write_b16_d16_hi"),
( -1, -1, -1, 0x56, 0xa2, "ds_read_u8_d16"),
( -1, -1, -1, 0x57, 0xa3, "ds_read_u8_d16_hi"),
( -1, -1, -1, 0x58, 0xa4, "ds_read_i8_d16"),
( -1, -1, -1, 0x59, 0xa5, "ds_read_i8_d16_hi"),
( -1, -1, -1, 0x5a, 0xa6, "ds_read_u16_d16"),
( -1, -1, -1, 0x5b, 0xa7, "ds_read_u16_d16_hi"),
(0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),
(0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),
(0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment