Commit 639bcc01 authored by David Schleef's avatar David Schleef

Remove orc-float

parent 7cbbc53a
pkgincludedir = $(includedir)/orc-@ORC_MAJORMINOR@/orc-float
lib_LTLIBRARIES = liborc-float-@ORC_MAJORMINOR@.la
liborc_float_@ORC_MAJORMINOR@_la_LIBADD = $(ORC_LIBS)
liborc_float_@ORC_MAJORMINOR@_la_LDFLAGS = \
-no-undefined -export-symbols-regex 'orc_'
liborc_float_@ORC_MAJORMINOR@_la_CFLAGS = $(ORC_CFLAGS)
liborc_float_@ORC_MAJORMINOR@_la_SOURCES = \
orcfloat.c orcfloat-sse.c \
orcfloat-neon.c
pkginclude_HEADERS = \
orcfloat.h
orc-float
=========
See the notes about orc-pixel.
Specifying floating point parameters requires you to use the integer
that corresponds to the bit pattern of the floating point number you
want.
The SSE backend generates nearly correct output, however, it's not
bit exact with the C code for some operations. Of course, it's
notoriously difficult to get bit-exact floating point code in C.
#include "config.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <orc/orcprogram.h>
#include <orc/orcarm.h>
#include <orc/orcdebug.h>
#include <orc/orcneon.h>
#if 0
const char *orc_neon_reg_name (int reg)
{
static const char *vec_regs[] = {
"d0", "d1", "d2", "d3",
"d4", "d5", "d6", "d7",
"d8", "d9", "d10", "d11",
"d12", "d13", "d14", "d15",
"d16", "d17", "d18", "d19",
"d20", "d21", "d22", "d23",
"d24", "d25", "d26", "d27",
"d28", "d29", "d30", "d31",
};
if (reg < ORC_VEC_REG_BASE || reg >= ORC_VEC_REG_BASE+32) {
return "ERROR";
}
return vec_regs[reg&0x1f];
}
const char *orc_neon_reg_name_quad (int reg)
{
static const char *vec_regs[] = {
"q0", "ERROR", "q1", "ERROR",
"q2", "ERROR", "q3", "ERROR",
"q4", "ERROR", "q5", "ERROR",
"q6", "ERROR", "q7", "ERROR",
"q8", "ERROR", "q9", "ERROR",
"q10", "ERROR", "q11", "ERROR",
"q12", "ERROR", "q13", "ERROR",
"q14", "ERROR", "q15", "ERROR",
};
if (reg < ORC_VEC_REG_BASE || reg >= ORC_VEC_REG_BASE+32) {
return "ERROR";
}
return vec_regs[reg&0x1f];
}
#endif
const char *orc_neon_reg_name_s (int reg)
{
static const char *vec_regs[] = {
"s0", "s2", "s4", "s6",
"s8", "s10", "s12", "s14",
"s16", "s18", "s20", "s22",
"s24", "s26", "s28", "s30",
"ERROR", "ERROR", "ERROR", "ERROR",
"ERROR", "ERROR", "ERROR", "ERROR",
"ERROR", "ERROR", "ERROR", "ERROR",
"ERROR", "ERROR", "ERROR", "ERROR"
};
if (reg < ORC_VEC_REG_BASE || reg >= ORC_VEC_REG_BASE+32) {
return "ERROR";
}
return vec_regs[reg&0x1f];
}
#define UNARY(opcode,insn_name,code) \
static void \
orc_neon_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
uint32_t x = code; \
ORC_ASM_CODE(p," " insn_name " %s, %s\n", \
orc_neon_reg_name (p->vars[insn->dest_args[0]].alloc), \
orc_neon_reg_name (p->vars[insn->src_args[0]].alloc)); \
x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12; \
x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22; \
x |= (p->vars[insn->src_args[0]].alloc&0xf)<<0; \
x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<5; \
orc_arm_emit (p, x); \
}
#define UNARY_S(opcode,insn_name,code) \
static void \
orc_neon_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
uint32_t x = code; \
ORC_ASM_CODE(p," " insn_name " %s, %s\n", \
orc_neon_reg_name_s (p->vars[insn->dest_args[0]].alloc), \
orc_neon_reg_name_s (p->vars[insn->src_args[0]].alloc)); \
x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12; \
x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22; \
x |= (p->vars[insn->src_args[0]].alloc&0xf)<<0; \
x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<5; \
orc_arm_emit (p, x); \
}
#define BINARY(opcode,insn_name,code) \
static void \
orc_neon_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
uint32_t x = code; \
ORC_ASM_CODE(p," " insn_name " %s, %s, %s\n", \
orc_neon_reg_name (p->vars[insn->dest_args[0]].alloc), \
orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), \
orc_neon_reg_name (p->vars[insn->src_args[1]].alloc)); \
x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<16; \
x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<7; \
x |= (p->vars[insn->src_args[0]].alloc&0xf)<<12; \
x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<22; \
x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0; \
x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5; \
orc_arm_emit (p, x); \
}
#define BINARY_S(opcode,insn_name,code) \
static void \
orc_neon_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
uint32_t x = code; \
ORC_ASM_CODE(p," " insn_name " %s, %s, %s\n", \
orc_neon_reg_name_s (p->vars[insn->dest_args[0]].alloc), \
orc_neon_reg_name_s (p->vars[insn->src_args[0]].alloc), \
orc_neon_reg_name_s (p->vars[insn->src_args[1]].alloc)); \
x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<16; \
x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<7; \
x |= (p->vars[insn->src_args[0]].alloc&0xf)<<12; \
x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<22; \
x |= (p->vars[insn->src_args[1]].alloc&0xf)<<0; \
x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<5; \
orc_arm_emit (p, x); \
}
#define BINARY_R(opcode,insn_name,code) \
static void \
orc_neon_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
uint32_t x = code; \
ORC_ASM_CODE(p," " insn_name " %s, %s, %s\n", \
orc_neon_reg_name (p->vars[insn->dest_args[0]].alloc), \
orc_neon_reg_name (p->vars[insn->src_args[0]].alloc), \
orc_neon_reg_name (p->vars[insn->src_args[1]].alloc)); \
x |= (p->vars[insn->src_args[1]].alloc&0xf)<<16; \
x |= ((p->vars[insn->src_args[1]].alloc>>4)&0x1)<<7; \
x |= (p->vars[insn->dest_args[0]].alloc&0xf)<<12; \
x |= ((p->vars[insn->dest_args[0]].alloc>>4)&0x1)<<22; \
x |= (p->vars[insn->src_args[0]].alloc&0xf)<<0; \
x |= ((p->vars[insn->src_args[0]].alloc>>4)&0x1)<<5; \
orc_arm_emit (p, x); \
}
UNARY(invf,"vrecpe.f32",0xf3bb0500)
UNARY(invsqrtf,"vrsqrte.f32",0xf3bb0580)
BINARY(addg,"vadd.f64",0xee300b00)
BINARY(subg,"vsub.f64",0xee300b40)
BINARY(mulg,"vmul.f64",0xee200b00)
BINARY(divg,"vdiv.f64",0xee800b00)
UNARY(sqrtg,"vsqrt.f64",0xeeb10bc0)
//BINARY(cmpeqg,"fcmped",0xf2000e00)
//BINARY(cmpltg,"fcmpltd",0xf6400e00)
//BINARY(cmpleg,"fcmpled",0xf3000e00)
//UNARY(convgl,"vcvt.s32.f64",0xf3bb0700)
//UNARY(convlg,"vcvt.f64.s32",0xf3bb0600)
void
orc_float_neon_register_rules (void)
{
OrcRuleSet *rule_set;
rule_set = orc_rule_set_new (orc_opcode_set_get("float"),
orc_target_get_by_name("neon"), 0);
#define REG(x) \
orc_rule_register (rule_set, #x , orc_neon_rule_ ## x, NULL)
REG(invf);
REG(invsqrtf);
REG(addg);
REG(subg);
REG(mulg);
REG(divg);
REG(sqrtg);
//REG(cmpeqg);
//REG(cmpltg);
//REG(cmpleg);
//REG(convgl);
//REG(convlg);
}
#include <orc-float/orcfloat.h>
#include <orc/orc.h>
#include <orc/orcdebug.h>
#include <orc/orcsse.h>
#include <stdlib.h>
#define X86_MODRM(mod, rm, reg) ((((mod)&3)<<6)|(((rm)&7)<<0)|(((reg)&7)<<3))
#define UNARY(opcode,insn_name,code) \
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
orc_sse_emit_0f (p, insn_name, code, \
p->vars[insn->src_args[0]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
#define BINARY(opcode,insn_name,code) \
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
orc_sse_emit_0f (p, insn_name, code, \
p->vars[insn->src_args[1]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
UNARY(invf, "rcpps", 0x53)
UNARY(invsqrtf, "rsqrtps", 0x52)
#define UNARY_66(opcode,insn_name,code) \
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
orc_sse_emit_660f (p, insn_name, code, \
p->vars[insn->src_args[0]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
#define BINARY_66(opcode,insn_name,code) \
static void \
sse_rule_ ## opcode (OrcCompiler *p, void *user, OrcInstruction *insn) \
{ \
orc_sse_emit_660f (p, insn_name, code, \
p->vars[insn->src_args[1]].alloc, \
p->vars[insn->dest_args[0]].alloc); \
}
BINARY_66(addg, "addpd", 0x58)
BINARY_66(subg, "subpd", 0x5c)
BINARY_66(mulg, "mulpd", 0x59)
BINARY_66(divg, "divpd", 0x5e)
BINARY_66(maxg, "maxpd", 0x5f)
BINARY_66(ming, "minpd", 0x5d)
#if 0
/* These don't actually exist */
UNARY_66(invg, "rcppd", 0x53)
UNARY_66(sqrtg, "sqrtpd", 0x51)
UNARY_66(invsqrtg, "rsqrtpd", 0x52)
#endif
static void
sse_rule_cmpeqg (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_sse_emit_660f (p, "cmpeqpd", 0xc2,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x00;
}
static void
sse_rule_cmpltg (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_sse_emit_660f (p, "cmpltpd", 0xc2,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x01;
}
static void
sse_rule_cmpleg (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_sse_emit_660f (p, "cmplepd", 0xc2,
p->vars[insn->src_args[1]].alloc,
p->vars[insn->dest_args[0]].alloc);
*p->codeptr++ = 0x02;
}
static void
sse_rule_convgl (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_sse_emit_f20f (p, "cvtpd2dq", 0xe6,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
static void
sse_rule_convlg (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_sse_emit_f30f (p, "cvtdq2pd", 0xe6,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
static void
sse_rule_convgf (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_sse_emit_660f (p, "cvtpd2ps", 0x5a,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
static void
sse_rule_convfg (OrcCompiler *p, void *user, OrcInstruction *insn)
{
orc_sse_emit_0f (p, "cvtps2pd", 0x5a,
p->vars[insn->src_args[0]].alloc,
p->vars[insn->dest_args[0]].alloc);
}
void
orc_float_sse_register_rules (OrcTarget *target)
{
OrcRuleSet *rule_set;
rule_set = orc_rule_set_new (orc_opcode_set_get("float"),
orc_target_get_by_name ("sse"), ORC_TARGET_SSE_SSE2);
orc_rule_register (rule_set, "invf", sse_rule_invf, NULL);
orc_rule_register (rule_set, "invsqrtf", sse_rule_invsqrtf, NULL);
orc_rule_register (rule_set, "addg", sse_rule_addg, NULL);
orc_rule_register (rule_set, "subg", sse_rule_subg, NULL);
orc_rule_register (rule_set, "mulg", sse_rule_mulg, NULL);
orc_rule_register (rule_set, "divg", sse_rule_divg, NULL);
orc_rule_register (rule_set, "ming", sse_rule_ming, NULL);
orc_rule_register (rule_set, "maxg", sse_rule_maxg, NULL);
#if 0
/* These don't actually exist */
orc_rule_register (rule_set, "invg", sse_rule_invg, NULL);
orc_rule_register (rule_set, "sqrtg", sse_rule_sqrtg, NULL);
orc_rule_register (rule_set, "invsqrtg", sse_rule_invsqrtg, NULL);
#endif
orc_rule_register (rule_set, "cmpeqg", sse_rule_cmpeqg, NULL);
orc_rule_register (rule_set, "cmpltg", sse_rule_cmpltg, NULL);
orc_rule_register (rule_set, "cmpleg", sse_rule_cmpleg, NULL);
orc_rule_register (rule_set, "convgl", sse_rule_convgl, NULL);
orc_rule_register (rule_set, "convlg", sse_rule_convlg, NULL);
orc_rule_register (rule_set, "convgf", sse_rule_convgf, NULL);
orc_rule_register (rule_set, "convfg", sse_rule_convfg, NULL);
}
#include <orc-float/orcfloat.h>
#include <orc/orc.h>
#include <orc/orcdebug.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
/* This should be static, but compilers can't agree on what to use
* for forward declarations of static arrays. */
OrcStaticOpcode opcodes[];
void orc_float_sse_register_rules (void);
void orc_float_neon_register_rules (void);
void
orc_float_init (void)
{
orc_init ();
orc_opcode_register_static (opcodes, "float");
orc_float_sse_register_rules ();
orc_float_neon_register_rules ();
}
static float
ORC_FLOAT_READ(void *addr)
{
union {
float f;
unsigned int i;
} x;
x.i = *(unsigned int *)(addr);
return x.f;
}
static void
ORC_FLOAT_WRITE(void *addr, float value)
{
union {
float f;
unsigned int i;
} x;
x.f = value;
*(unsigned int *)(addr) = x.i;
}
//#define ORC_FLOAT_READ(addr) (*(float *)(addr))
//#define ORC_FLOAT_WRITE(addr,value) do{ (*(float *)(addr)) = (value); }while(0)
#define UNARY_F(name,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
float a = ORC_FLOAT_READ(&ex->src_values[0]); \
ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \
}
#define BINARY_F(name,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
void *pa = &ex->src_values[0]; \
void *pb = &ex->src_values[1]; \
float a = ORC_FLOAT_READ(pa); \
float b = ORC_FLOAT_READ(pb); \
ORC_FLOAT_WRITE(&ex->dest_values[0], code ); \
}
#define BINARY_FL(name,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
float a = ORC_FLOAT_READ(&ex->src_values[0]); \
float b = ORC_FLOAT_READ(&ex->src_values[1]); \
ex->dest_values[0] = code ; \
}
UNARY_F(invf, (1.0f/a) )
UNARY_F(invsqrtf, 1.0f/sqrtf(a))
static double
ORC_DOUBLE_READ(void *addr)
{
union {
double f;
unsigned long long i;
} x;
x.i = *(unsigned long long *)(addr);
return x.f;
}
static void
ORC_DOUBLE_WRITE(void *addr, double value)
{
union {
double f;
unsigned long long i;
} x;
x.f = value;
*(unsigned long long *)(addr) = x.i;
}
//#define ORC_DOUBLE_READ(addr) (*(double *)(void *)(addr))
//#define ORC_DOUBLE_WRITE(addr,value) do{ (*(double *)(void *)(addr)) = (value); }while(0)
#define UNARY_G(name,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
double a = ORC_DOUBLE_READ(&ex->src_values[0]); \
ORC_DOUBLE_WRITE(&ex->dest_values[0], code ); \
}
#define BINARY_G(name,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
double a = ORC_DOUBLE_READ(&ex->src_values[0]); \
double b = ORC_DOUBLE_READ(&ex->src_values[1]); \
ORC_DOUBLE_WRITE(&ex->dest_values[0], code ); \
}
#define BINARY_GQ(name,code) \
static void \
name (OrcOpcodeExecutor *ex, void *user) \
{ \
double a = ORC_DOUBLE_READ(&ex->src_values[0]); \
double b = ORC_DOUBLE_READ(&ex->src_values[1]); \
ex->dest_values[0] = code ; \
}
BINARY_G(addg, a + b)
BINARY_G(subg, a - b)
BINARY_G(mulg, a * b)
BINARY_G(divg, a / b)
UNARY_G(invg, (1.0f/a) )
UNARY_G(sqrtg, sqrt(a) )
BINARY_G(maxg, (a>b) ? a : b)
BINARY_G(ming, (a<b) ? a : b)
UNARY_G(invsqrtg, 1.0f/sqrt(a))
BINARY_GQ(cmpeqg, (a == b) ? (~0) : 0)
BINARY_GQ(cmpltg, (a < b) ? (~0) : 0)
BINARY_GQ(cmpleg, (a <= b) ? (~0) : 0)
static void
convgl (OrcOpcodeExecutor *ex, void *user)
{
ex->dest_values[0] = ORC_DOUBLE_READ(&ex->src_values[0]);
}
static void
convlg (OrcOpcodeExecutor *ex, void *user)
{
ORC_DOUBLE_WRITE(&ex->dest_values[0], ex->src_values[0]);
}
static void
convgf (OrcOpcodeExecutor *ex, void *user)
{
ORC_FLOAT_WRITE(&ex->dest_values[0], ORC_DOUBLE_READ(&ex->src_values[0]));
}
static void
convfg (OrcOpcodeExecutor *ex, void *user)
{
ORC_DOUBLE_WRITE(&ex->dest_values[0], ORC_FLOAT_READ(&ex->src_values[0]));
}
OrcStaticOpcode opcodes[] = {
{ "invf", invf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4 } },
{ "invsqrtf", invsqrtf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 4 } },
{ "addg", addg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "subg", subg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "mulg", mulg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "divg", divg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "invg", invg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8 } },
{ "sqrtg", sqrtg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8 } },
{ "maxg", maxg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "ming", ming, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "invsqrtg", invsqrtg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8 } },
{ "cmpeqg", cmpeqg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "cmpltg", cmpltg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "cmpleg", cmpleg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 8, 8 } },
{ "convgl", convgl, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 8 } },
{ "convlg", convlg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 4 } },
{ "convgf", convgf, NULL, ORC_STATIC_OPCODE_FLOAT, { 4 }, { 8 } },
{ "convfg", convfg, NULL, ORC_STATIC_OPCODE_FLOAT, { 8 }, { 4 } },
{ "" }
};
#ifndef _ORC_FLOAT_FLOAT_H_
#define _ORC_FLOAT_FLOAT_H_
#include <orc/orc.h>
#include <orc/orcutils.h>
ORC_BEGIN_DECLS
void orc_float_init (void);
ORC_END_DECLS
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment