Commit b3517a4d authored by David Schleef's avatar David Schleef

sse: handle NANs and denormals

Set the DAZ and FTZ flags in the MXCSR, to set proper
denormal behavior.  Implement NAN handling for maxf and
minf.
parent 631d097e
......@@ -593,6 +593,8 @@ orc_compiler_sse_assemble (OrcCompiler *compiler)
orc_x86_emit_prologue (compiler);
orc_sse_set_mxcsr (compiler);
sse_load_constants_outer (compiler);
if (compiler->program->is_2d) {
......@@ -746,6 +748,7 @@ orc_compiler_sse_assemble (OrcCompiler *compiler)
sse_save_accumulators (compiler);
orc_sse_restore_mxcsr (compiler);
orc_x86_emit_epilogue (compiler);
orc_x86_do_fixups (compiler);
......
......@@ -74,7 +74,9 @@ typedef void (*OrcExecutorFunc)(OrcExecutor *ex);
enum {
ORC_TARGET_C_C99 = (1<<0),
ORC_TARGET_C_BARE = (1<<1),
ORC_TARGET_C_NOEXEC = (1<<2)
ORC_TARGET_C_NOEXEC = (1<<2),
ORC_TARGET_FAST_NAN = (1<<30),
ORC_TARGET_FAST_DENORMAL = (1<<31)
};
enum {
......
......@@ -955,10 +955,54 @@ BINARY_F(addf, "addps", 0x58)
BINARY_F(subf, "subps", 0x5c)
BINARY_F(mulf, "mulps", 0x59)
BINARY_F(divf, "divps", 0x5e)
BINARY_F(maxf, "maxps", 0x5f)
BINARY_F(minf, "minps", 0x5d)
UNARY_F(sqrtf, "sqrtps", 0x51)
static void
sse_rule_minf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_FAST_NAN) {
orc_sse_emit_0f (p, "minps", 0x5d,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
} else {
orc_sse_emit_movdqa (p,
p->vars[insn->src_args[1]].alloc,
p->tmpreg);
orc_sse_emit_0f (p, "minps", 0x5d,
p->vars[insn->dest_args[0]].alloc,
p->tmpreg);
orc_sse_emit_0f (p, "minps", 0x5d,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
orc_sse_emit_por (p,
p->tmpreg,
p->vars[insn->dest_args[0]].alloc);
}
}
static void
sse_rule_maxf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
if (p->target_flags & ORC_TARGET_FAST_NAN) {
orc_sse_emit_0f (p, "maxps", 0x5f,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
} else {
orc_sse_emit_movdqa (p,
p->vars[insn->src_args[1]].alloc,
p->tmpreg);
orc_sse_emit_0f (p, "maxps", 0x5f,
p->vars[insn->dest_args[0]].alloc,
p->tmpreg);
orc_sse_emit_0f (p, "maxps", 0x5f,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
orc_sse_emit_por (p,
p->tmpreg,
p->vars[insn->dest_args[0]].alloc);
}
}
static void
sse_rule_cmpeqf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
......
......@@ -275,3 +275,60 @@ void orc_x86_emit_mov_sse_reg (OrcCompiler *compiler, int reg1, int reg2)
orc_x86_emit_modrm_reg (compiler, reg2, reg1);
}
void
orc_sse_set_mxcsr (OrcCompiler *compiler)
{
int value;
ORC_ASM_CODE(compiler," stmxcsr %d(%%%s)\n",
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A4]),
orc_x86_get_regname(compiler->exec_reg));
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0xae;
orc_x86_emit_modrm_memoffset (compiler, 3,
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A4]), compiler->exec_reg);
orc_x86_emit_mov_memoffset_reg (compiler, 4,
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A4]),
compiler->exec_reg, compiler->gp_tmpreg);
orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg,
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_C1]),
compiler->exec_reg);
value = 0x8040;
ORC_ASM_CODE(compiler," orl $%d, %%%s\n", value,
orc_x86_get_regname(compiler->gp_tmpreg));
orc_x86_emit_rex(compiler, 4, 0, 0, compiler->gp_tmpreg);
*compiler->codeptr++ = 0x81;
orc_x86_emit_modrm_reg (compiler, compiler->gp_tmpreg, 1);
*compiler->codeptr++ = (value & 0xff);
*compiler->codeptr++ = ((value>>8) & 0xff);
*compiler->codeptr++ = ((value>>16) & 0xff);
*compiler->codeptr++ = ((value>>24) & 0xff);
orc_x86_emit_mov_reg_memoffset (compiler, 4, compiler->gp_tmpreg,
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A4]),
compiler->exec_reg);
ORC_ASM_CODE(compiler," ldmxcsr %d(%%%s)\n",
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A4]),
orc_x86_get_regname(compiler->exec_reg));
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0xae;
orc_x86_emit_modrm_memoffset (compiler, 2,
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_A4]), compiler->exec_reg);
}
void
orc_sse_restore_mxcsr (OrcCompiler *compiler)
{
ORC_ASM_CODE(compiler," ldmxcsr %d(%%%s)\n",
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_C1]),
orc_x86_get_regname(compiler->exec_reg));
*compiler->codeptr++ = 0x0f;
*compiler->codeptr++ = 0xae;
orc_x86_emit_modrm_memoffset (compiler, 2,
(int)ORC_STRUCT_OFFSET(OrcExecutor,params[ORC_VAR_C1]), compiler->exec_reg);
}
......@@ -67,6 +67,9 @@ void orc_sse_emit_pshuflw (OrcCompiler *p, int shuf, int src, int dest);
void orc_sse_emit_shiftimm (OrcCompiler *p, const char *insn_name,
int code, int modrm_code, int shift, int reg);
void orc_sse_set_mxcsr (OrcCompiler *compiler);
void orc_sse_restore_mxcsr (OrcCompiler *compiler);
unsigned int orc_sse_get_cpu_flags (void);
/* SSE instructions */
......
......@@ -897,6 +897,7 @@ orc_x86_emit_prologue (OrcCompiler *compiler)
orc_x86_emit_push (compiler, 4, X86_EBX);
}
}
}
void
......
......@@ -43,6 +43,7 @@ void orc_x86_emit_add_reg_memoffset (OrcCompiler *compiler, int size, int reg1,
void orc_x86_emit_and_imm_memoffset (OrcCompiler *compiler, int size, int value, int offset, int reg);
void orc_x86_emit_add_imm_reg (OrcCompiler *compiler, int size, int value, int reg, orc_bool record);
void orc_x86_emit_and_imm_reg (OrcCompiler *compiler, int size, int value, int reg);
void orc_x86_emit_or_imm_reg (OrcCompiler *compiler, int size, int value, int reg);
void orc_x86_emit_add_reg_reg (OrcCompiler *compiler, int size, int reg1, int reg2);
void orc_x86_emit_sub_reg_reg (OrcCompiler *compiler, int size, int reg1, int reg2);
void orc_x86_emit_imul_memoffset_reg (OrcCompiler *compiler, int size,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment