Commit 26e69b73 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜
Browse files

pan/bi: Handle fsqrt like the DDK



There are 4 distinct cases of fsqrt:

1. FP16 on Bifrost

Here we may use lower to `x * rsqrt(x)` with a .left modifier on the
FMA_RSCALE.v2f16 used to carry out the multiplication, ensuring correct
handling of NaN and Inf.

2. FP32 on G71

Missing FRSQ.f32 instruction, do something simple since we don't even
probe the driver on G71...

3. FP32 on G72 and newer

We can do the same lowering as FP16 in theory. However, this may have
precision issues. The DDK uses extra FREXPM/FREXPE instructions in a
.sqrt mode for a range reduction. It's unknown if this is necessary for
OpenGL (ES), Vulkan, OpenCL, or some combination thereof.

4. FP16 on Valhall

We want to use the same strategy as on Bifrost, but Valhall removed the
FMA_RSCALE.v2f16 instruction. Instead we use an ordinary FMA.v2f16 for
the multiply and check the special case explicitly with CSEL.v2f32 ...
I'm not sure if this is right for infinity but the DDK does it so ¯\_(ツ)_/¯
Signed-off-by: Alyssa Rosenzweig's avatarAlyssa Rosenzweig <alyssa@collabora.com>
parent 8b960a5d
Pipeline #383388 waiting for manual action with stages
......@@ -1485,6 +1485,40 @@ bi_lower_frsq_32(bi_builder *b, bi_index dst, bi_index s0)
BI_ROUND_NONE, BI_SPECIAL_N);
}
static void
bi_lower_fsqrt_32(bi_builder *b, bi_index dst, bi_index s0)
{
bi_index m = bi_frexpm_f32(b, s0, false, true);
bi_index e = bi_frexpe_f32(b, s0, false, true);
bi_index rcp = bi_temp(b->shader);
bi_lower_frsq_32(b, rcp, m);
bi_fma_rscale_f32_to(b, dst, m, rcp, bi_negzero(), e, BI_ROUND_NONE, BI_SPECIAL_LEFT);
}
static void
bi_fsqrt_32(bi_builder *b, bi_index dst, bi_index s0)
{
bi_index m = bi_frexpm_f32(b, s0, false, true);
bi_index e = bi_frexpe_f32(b, s0, false, true);
bi_index rcp = bi_frsq_f32(b, m);
bi_fma_rscale_f32_to(b, dst, m, rcp, bi_negzero(), e, BI_ROUND_NONE, BI_SPECIAL_LEFT);
}
static void
bi_fsqrt_16(bi_builder *b, bi_index dst, bi_index s0)
{
bi_index rsq = bi_frsq_f16(b, s0);
if (b->shader->arch <= 8) {
/* Bifrost has a 16-bit FMA_RSCALE */
bi_fma_rscale_v2f16_to(b, dst, s0, rsq, bi_negzero(), bi_zero(), BI_ROUND_NONE, BI_SPECIAL_LEFT);
} else {
/* Valhall does not, so we need to lower */
bi_index sqrt = bi_fma_v2f16(b, s0, rsq, bi_negzero(), BI_ROUND_NONE);
bi_csel_v2f16_to(b, dst, s0, bi_zero(), s0, sqrt, BI_CMPF_EQ);
}
}
/* More complex transcendentals, see
* https://gitlab.freedesktop.org/panfrost/mali-isa-docs/-/blob/master/Bifrost.adoc
* for documentation */
......@@ -2235,6 +2269,15 @@ bi_emit_alu(bi_builder *b, nir_alu_instr *instr)
bi_frcp_to(b, sz, dst, s0);
break;
case nir_op_fsqrt:
if (sz == 32 && b->shader->quirks & BIFROST_NO_FP32_TRANSCENDENTALS)
bi_lower_fsqrt_32(b, dst, s0);
else if (sz == 32)
bi_fsqrt_32(b, dst, s0);
else
bi_fsqrt_16(b, dst, s0);
break;
case nir_op_uclz:
bi_clz_to(b, sz, dst, s0, false);
break;
......@@ -3391,6 +3434,7 @@ bi_vectorize_filter(const nir_instr *instr, void *data)
switch (alu->op) {
case nir_op_frcp:
case nir_op_frsq:
case nir_op_fsqrt:
case nir_op_ishl:
case nir_op_ishr:
case nir_op_ushr:
......
......@@ -46,7 +46,6 @@ static const nir_shader_compiler_options bifrost_nir_options = {
.lower_find_lsb = true,
.lower_ifind_msb = true,
.lower_fdph = true,
.lower_fsqrt = true,
.lower_wpos_pntc = true,
.lower_fsign = true,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment