Commit 48a75e7a authored by Daniel Schürmann's avatar Daniel Schürmann Committed by Daniel Schürmann

amd/common: lower bitfield_insert to bfm & bitfield_select

Reviewed-by: Connor Abbott's avatarConnor Abbott <cwabbott0@gmail.com>
parent a8b0b6e5
......@@ -455,34 +455,30 @@ static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
return result;
}
static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
LLVMValueRef src0, LLVMValueRef src1,
LLVMValueRef src2, LLVMValueRef src3)
static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
LLVMValueRef bits, LLVMValueRef offset)
{
LLVMValueRef bfi_args[3], result;
bfi_args[0] = LLVMBuildShl(ctx->builder,
LLVMBuildSub(ctx->builder,
LLVMBuildShl(ctx->builder,
ctx->i32_1,
src3, ""),
ctx->i32_1, ""),
src2, "");
bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
bfi_args[2] = src0;
LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
/* mask = ((1 << bits) - 1) << offset */
return LLVMBuildShl(ctx->builder,
LLVMBuildSub(ctx->builder,
LLVMBuildShl(ctx->builder,
ctx->i32_1,
bits, ""),
ctx->i32_1, ""),
offset, "");
}
static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
LLVMValueRef mask, LLVMValueRef insert,
LLVMValueRef base)
{
/* Calculate:
* (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
* (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
* Use the right-hand side, which the LLVM backend can convert to V_BFI.
*/
result = LLVMBuildXor(ctx->builder, bfi_args[2],
LLVMBuildAnd(ctx->builder, bfi_args[0],
LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
return result;
return LLVMBuildXor(ctx->builder, base,
LLVMBuildAnd(ctx->builder, mask,
LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
}
static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
......@@ -835,15 +831,18 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
else
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
break;
case nir_op_bfm:
result = emit_bfm(&ctx->ac, src[0], src[1]);
break;
case nir_op_bitfield_select:
result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
break;
case nir_op_ibitfield_extract:
result = emit_bitfield_extract(&ctx->ac, true, src);
break;
case nir_op_ubitfield_extract:
result = emit_bitfield_extract(&ctx->ac, false, src);
break;
case nir_op_bitfield_insert:
result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
break;
case nir_op_bitfield_reverse:
result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
break;
......
......@@ -58,6 +58,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_device_index_to_zero = true,
.lower_fsat = true,
.lower_fdiv = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_sub = true,
.lower_pack_snorm_2x16 = true,
.lower_pack_snorm_4x8 = true,
......
......@@ -487,6 +487,7 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_flrp64 = true,
.lower_fsat = true,
.lower_fdiv = true,
.lower_bitfield_insert_to_bitfield_select = true,
.lower_sub = true,
.lower_ffma = true,
.lower_fmod = true,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment