Commit bf686b27 authored by Matt Turner's avatar Matt Turner

i965/vec4: Optimize unpackUnorm4x8().

Reduces the number of instructions needed to implement unpackUnorm4x8()
from 11 -> 4.
Reviewed-by: Kenneth Graunke's avatarKenneth Graunke <kenneth@whitecape.org>
parent cb0ba848
......@@ -85,8 +85,11 @@ brw_lower_packing_builtins(struct brw_context *brw,
| LOWER_UNPACK_UNORM_2x16
| LOWER_PACK_SNORM_4x8
| LOWER_UNPACK_SNORM_4x8
| LOWER_PACK_UNORM_4x8
| LOWER_UNPACK_UNORM_4x8;
| LOWER_PACK_UNORM_4x8;
if (shader_type == MESA_SHADER_FRAGMENT) {
ops |= LOWER_UNPACK_UNORM_4x8;
}
if (brw->gen >= 7) {
/* Gen7 introduced the f32to16 and f16to32 instructions, which can be
......
......@@ -508,6 +508,7 @@ public:
void emit_pack_half_2x16(dst_reg dst, src_reg src0);
void emit_unpack_half_2x16(dst_reg dst, src_reg src0);
void emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0);
uint32_t gather_channel(ir_texture *ir, uint32_t sampler);
src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler);
......
......@@ -466,6 +466,28 @@ vec4_visitor::emit_unpack_half_2x16(dst_reg dst, src_reg src0)
emit(F16TO32(dst, tmp_src));
}
void
vec4_visitor::emit_unpack_unorm_4x8(const dst_reg &dst, src_reg src0)
{
/* Instead of splitting the 32-bit integer, shifting, and ORing it back
* together, we can shift it by <0, 8, 16, 24>. The packed integer immediate
* is not suitable to generate the shift values, but we can use the packed
* vector float and a type-converting MOV.
*/
dst_reg shift(this, glsl_type::uvec4_type);
emit(MOV(shift, src_reg(0x00, 0x60, 0x70, 0x78)));
dst_reg shifted(this, glsl_type::uvec4_type);
src0.swizzle = BRW_SWIZZLE_XXXX;
emit(SHR(shifted, src0, src_reg(shift)));
shifted.type = BRW_REGISTER_TYPE_UB;
dst_reg f(this, glsl_type::vec4_type);
emit(MOV(f, src_reg(shifted)));
emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
}
void
vec4_visitor::visit_instructions(const exec_list *list)
{
......@@ -1747,6 +1769,9 @@ vec4_visitor::visit(ir_expression *ir)
case ir_unop_unpack_half_2x16:
emit_unpack_half_2x16(result_dst, op[0]);
break;
case ir_unop_unpack_unorm_4x8:
emit_unpack_unorm_4x8(result_dst, op[0]);
break;
case ir_unop_pack_snorm_2x16:
case ir_unop_pack_snorm_4x8:
case ir_unop_pack_unorm_2x16:
......@@ -1754,7 +1779,6 @@ vec4_visitor::visit(ir_expression *ir)
case ir_unop_unpack_snorm_2x16:
case ir_unop_unpack_snorm_4x8:
case ir_unop_unpack_unorm_2x16:
case ir_unop_unpack_unorm_4x8:
unreachable("not reached: should be handled by lower_packing_builtins");
case ir_unop_unpack_half_2x16_split_x:
case ir_unop_unpack_half_2x16_split_y:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment