Commit 10d5d2d5 authored by Emma Anholt's avatar Emma Anholt
Browse files

vc4: Fix sin(0.0) and cos(0.0) accuracy to fix SDL rendering rotation.

SDL has some shaders that compute sin(angle) and cos(angle) for a rotation
matrix in the VS, and angle is usually 0.0.  Our previous implementation
had quite a bit of error around 0.0, causing single-pixel rotations at
typical window sizes.  SDL2 has changed as of August 28th (commit
12156:e5a666405750) to not need sin/cos in the VS, but we should still fix
this for existing implementations or similar patterns that other programs
may have.

glsl-cos goes from 32 instructions to 36, but 9 uniforms to 7.
glsl-sin goes from 32 instructions to 34, but 8 uniforms to 7.

This seems like a fine impact to have for the bugfix.

Cc: 18.1 18.2 <mesa-stable@lists.freedesktop.org>
Fixes: https://github.com/anholt/mesa/issues/110
parent a0baedb6
......@@ -686,25 +686,45 @@ ntq_fceil(struct vc4_compile *c, struct qreg src)
return qir_MOV(c, result);
}
static struct qreg
ntq_shrink_sincos_input_range(struct vc4_compile *c, struct qreg x)
{
/* Since we're using a Taylor approximation, we want to have a small
* number of coefficients and take advantage of sin/cos repeating
* every 2pi. We keep our x as close to 0 as we can, since the series
* will be less accurate as |x| increases. (Also, be careful of
* shifting the input x value to be tricky with sin/cos relations,
* because getting accurate values for x==0 is very important for SDL
* rendering)
*/
struct qreg scaled_x =
qir_FMUL(c, x,
qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
/* Note: FTOI truncates toward 0. */
struct qreg x_frac = qir_FSUB(c, scaled_x,
qir_ITOF(c, qir_FTOI(c, scaled_x)));
/* Map [0.5, 1] to [-0.5, 0] */
qir_SF(c, qir_FSUB(c, x_frac, qir_uniform_f(c, 0.5)));
qir_FSUB_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NC;
/* Map [-1, -0.5] to [0, 0.5] */
qir_SF(c, qir_FADD(c, x_frac, qir_uniform_f(c, 0.5)));
qir_FADD_dest(c, x_frac, x_frac, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS;
return x_frac;
}
static struct qreg
ntq_fsin(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
-2.0 * M_PI,
pow(2.0 * M_PI, 3) / (3 * 2 * 1),
-pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
2.0 * M_PI,
-pow(2.0 * M_PI, 3) / (3 * 2 * 1),
pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
struct qreg scaled_x =
qir_FMUL(c,
src,
qir_uniform_f(c, 1.0 / (M_PI * 2.0)));
struct qreg x = qir_FADD(c,
ntq_ffract(c, scaled_x),
qir_uniform_f(c, -0.5));
struct qreg x = ntq_shrink_sincos_input_range(c, src);
struct qreg x2 = qir_FMUL(c, x, x);
struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
for (int i = 1; i < ARRAY_SIZE(coeff); i++) {
......@@ -722,21 +742,15 @@ static struct qreg
ntq_fcos(struct vc4_compile *c, struct qreg src)
{
float coeff[] = {
-1.0f,
pow(2.0 * M_PI, 2) / (2 * 1),
-pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
1.0f,
-pow(2.0 * M_PI, 2) / (2 * 1),
pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
-pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
};
struct qreg scaled_x =
qir_FMUL(c, src,
qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
struct qreg x_frac = qir_FADD(c,
ntq_ffract(c, scaled_x),
qir_uniform_f(c, -0.5));
struct qreg x_frac = ntq_shrink_sincos_input_range(c, src);
struct qreg sum = qir_uniform_f(c, coeff[0]);
struct qreg x2 = qir_FMUL(c, x_frac, x_frac);
struct qreg x = x2; /* Current x^2, x^4, or x^6 */
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment