Commit 2854b63e authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜

Correctly implement sin and cos, extending NIR

parent bb576768
......@@ -1888,6 +1888,7 @@ typedef struct nir_shader_compiler_options {
bool lower_extract_word;
bool lower_all_io_to_temps;
bool lower_fsinpi;
/**
* Does the driver support real 32-bit integers? (Otherwise, integers
......
......@@ -214,6 +214,8 @@ unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f,
unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
unop("fsinpi", tfloat, "bit_size == 64 ? sin(src0 / 3.14159) : sinf(src0 / 3.14159)")
unop("fcospi", tfloat, "bit_size == 64 ? cos(src0 / 3.14159) : cosf(src0 / 3.14159)")
# Partial derivatives.
......
......@@ -562,6 +562,9 @@ optimizations = [
('extract_i8', 'v', 3))),
127.0))),
'options->lower_unpack_snorm_4x8'),
(('fsin', a), ('fsinpi', ('fdiv', a, 3.14159)), 'options->lower_fsinpi'),
(('fcos', a), ('fcospi', ('fdiv', a, 3.14159)), 'options->lower_fsinpi'),
]
invert = {'feq': 'fne', 'fne': 'feq', 'fge': 'flt', 'flt': 'fge' }
......
......@@ -359,11 +359,15 @@ emit_alu(compiler_context *ctx, nir_alu_instr *instr)
EMIT_ALU_CASE_1(fexp2, fexp2);
EMIT_ALU_CASE_1(flog2, flog2);
// TODO: Input needs to be divided by pi, but doing that
// efficiently might require a custom NIR instruction +
// lowering pass?
//EMIT_ALU_CASE_1(fsin, fsin);
//EMIT_ALU_CASE_1(fcos, fcos);
// Input needs to be divided by pi due to Midgard weirdness We
// define special NIR ops, fsinpi and fcospi, that include the
// division correctly, supplying appropriately lowering passes.
// That way, the division by pi can take advantage of constant
// folding, algebraic simplifications, and so forth.
EMIT_ALU_CASE_1(fsinpi, fsin);
EMIT_ALU_CASE_1(fcospi, fcos);
//EMIT_ALU_CASE_2(fatan_pt1);
default:
......@@ -599,6 +603,7 @@ static const nir_shader_compiler_options nir_options = {
.lower_fmod32 = true,
.lower_fmod64 = true,
.lower_fdiv = true,
.lower_fsinpi = true,
.fuse_ffma = true,
.native_integers = true,
.vertex_id_zero_based = true,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment