Commit 5cf9420f authored by Marek Vasut's avatar Marek Vasut
Browse files

aarch64: Implement loadupdb instruction

Fill in aarch64 opcodes for loadupdb instruction, which is used
by various color space conversion programs. This is thus far only
available on aarch64, but arm32 port should be easy.
parent bf33f041
......@@ -440,6 +440,8 @@ ORC_API void orc_arm64_emit_ret (OrcCompiler *p, int Rn);
orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,0,Rd,Rn,Rm,0)
#define orc_arm64_emit_add_lsl(p,bits,Rd,Rn,Rm,val) \
orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,ORC_ARM_LSL,Rd,Rn,Rm,val)
#define orc_arm64_emit_add_lsr(p,bits,Rd,Rn,Rm,val) \
orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,ORC_ARM_LSR,Rd,Rn,Rm,val)
#define orc_arm64_emit_add_asr(p,bits,Rd,Rn,Rm,val) \
orc_arm64_emit_am(p,bits,ORC_ARM64_DP_ADD,ORC_ARM64_TYPE_REG,ORC_ARM_ASR,Rd,Rn,Rm,val)
#define orc_arm64_emit_add_ror(p,bits,Rd,Rn,Rm,val) \
......
......@@ -311,6 +311,17 @@ orc_compiler_neon_init (OrcCompiler *compiler)
compiler->unroll_shift = 0;
}
if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */
for(i=0;i<compiler->n_insns;i++){
OrcInstruction *insn = compiler->insns + i;
OrcStaticOpcode *opcode = insn->opcode;
if (strcmp (opcode->name, "loadupdb") == 0) {
compiler->vars[insn->src_args[0]].need_offset_reg = TRUE;
}
}
}
if (0) {
compiler->need_mask_regs = TRUE;
}
......@@ -343,6 +354,34 @@ orc_neon_load_constants_outer (OrcCompiler *compiler)
}
orc_compiler_emit_invariants (compiler);
if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */
for(i=0;i<compiler->n_insns;i++){
OrcInstruction *insn = compiler->insns + i;
OrcStaticOpcode *opcode = insn->opcode;
if (strcmp (opcode->name, "loadupdb") == 0) {
if (compiler->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
orc_arm64_emit_load_reg (compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->exec_reg,
ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]));
} else {
if (!compiler->vars[insn->src_args[0]].ptr_offset)
continue;
if (!compiler->vars[insn->src_args[1]].value.i)
orc_arm64_emit_eor(compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[0]].ptr_offset);
else
orc_arm64_emit_load_imm(compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[1]].value.i);
}
}
}
}
}
static void
......@@ -363,6 +402,11 @@ orc_neon_load_constants_inner (OrcCompiler *compiler)
orc_arm64_emit_load_reg (compiler, 64,
compiler->vars[i].ptr_register,
compiler->exec_reg, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
if (compiler->vars[i].ptr_offset)
orc_arm64_emit_eor(compiler, 64,
compiler->vars[i].ptr_offset,
compiler->vars[i].ptr_offset,
compiler->vars[i].ptr_offset);
} else {
orc_arm_emit_load_reg (compiler,
compiler->vars[i].ptr_register,
......@@ -1125,22 +1169,23 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index)
if (compiler->vars[k].name == NULL) continue;
if (compiler->vars[k].vartype == ORC_VAR_TYPE_SRC ||
compiler->vars[k].vartype == ORC_VAR_TYPE_DEST) {
if (compiler->vars[k].ptr_register) {
if (compiler->is_64bit)
if (compiler->is_64bit) {
if (compiler->vars[k].ptr_offset) {
orc_arm64_emit_add_imm (compiler, 64,
compiler->vars[k].ptr_offset,
compiler->vars[k].ptr_offset,
compiler->vars[k].size << compiler->loop_shift);
} else if (compiler->vars[k].ptr_register) {
orc_arm64_emit_add_imm (compiler, 64,
compiler->vars[k].ptr_register,
compiler->vars[k].ptr_register,
compiler->vars[k].size << compiler->loop_shift);
else
}
} else {
orc_arm_emit_add_imm (compiler,
compiler->vars[k].ptr_register,
compiler->vars[k].ptr_register,
compiler->vars[k].size << compiler->loop_shift);
} else {
/* arm_emit_add_imm_memoffset (compiler, arm_ptr_size, */
/* compiler->vars[k].size << compiler->loop_shift, */
/* (int)ORC_STRUCT_OFFSET(OrcExecutor, arrays[k]), */
/* p->exec_reg); */
}
}
}
......
......@@ -1076,6 +1076,126 @@ orc_neon_storeq (OrcCompiler *compiler, int dest, int update, int src1, int is_a
}
#endif
static void
neon_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn)
{
OrcVariable *src = compiler->vars + insn->src_args[0];
unsigned int code = 0;
int size = src->size << compiler->insn_shift;
ORC_ASSERT(src->ptr_register); /* can ptr_register be 0 ? */
int ptr_reg;
if (!compiler->is_64bit) {
ORC_COMPILER_ERROR(compiler, "loadupdb is implemented only on aarch64");
return;
}
/* FIXME this should be fixed at a higher level */
if (src->vartype != ORC_VAR_TYPE_SRC && src->vartype != ORC_VAR_TYPE_DEST) {
ORC_COMPILER_ERROR(compiler, "loadX used with non src/dest");
return;
}
if (src->ptr_offset) {
ptr_reg = compiler->gp_tmpreg;
orc_arm64_emit_add_lsr(compiler, 64, ptr_reg, src->ptr_register, src->ptr_offset, 1);
} else {
ptr_reg = src->ptr_register;
}
int opcode, flag;
if (size > 16) {
/** load multiple single-element structures to one, two, three, or four registers */
char vt_str[64];
memset(vt_str, '\x00', 64);
if (size == 64) {
snprintf(vt_str, 64, "%s, %s, %s, %s",
orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 2, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 3, 1, 1));
opcode = 0x2;
} else if (size == 32) {
snprintf(vt_str, 64, "%s, %s",
orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1));
opcode = 0xa;
} else if (size == 16) {
snprintf(vt_str, 64, "%s",
orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1));
opcode = 0x7;
} else {
ORC_COMPILER_ERROR(compiler,"bad aligned load size %d",
src->size << compiler->insn_shift);
return;
}
flag = 0; /* Bytes */
ORC_ASM_CODE(compiler," ld1 { %s }, [%s]\n",
vt_str, orc_arm64_reg_name (ptr_reg, 64));
code = 0x0c400000;
code |= 0 << 30; /* Q-bit */
code |= (flag&0x3) << 10;
code |= (opcode&0xf) << 12;
} else {
/** load one single-element structure to one lane of one register */
flag = 0;
if (size == 8) {
opcode = 4;
flag = 1; /* size==01 */
} else if (size == 4) {
opcode = 4;
} else if (size == 2) {
opcode = 2;
} else if (size == 1) {
opcode = 0;
} else {
ORC_COMPILER_ERROR(compiler,"bad unaligned load size %d",
src->size << compiler->insn_shift);
return;
}
ORC_ASM_CODE(compiler," ld1 { %s }[0], [%s]\n",
orc_neon64_reg_name_vector_single (compiler->tmpreg, size),
orc_arm64_reg_name (ptr_reg, 64));
code = 0x0d400000;
code |= (opcode&0x7) << 13;
code |= (flag&0x3) << 10;
}
code |= (ptr_reg&0x1f) << 5;
code |= (compiler->tmpreg&0x1f);
orc_arm_emit (compiler, code);
OrcVariable tmpreg = { .alloc = compiler->tmpreg, .size = compiler->vars[insn->src_args[0]].size };
switch (src->size) {
case 1:
orc_neon64_emit_binary (compiler, "zip1", 0x0e003800,
compiler->vars[insn->dest_args[0]],
tmpreg,
tmpreg, compiler->insn_shift - 1);
break;
case 2:
orc_neon64_emit_binary (compiler, "zip1", 0x0e403800,
compiler->vars[insn->dest_args[0]],
tmpreg,
tmpreg, compiler->insn_shift - 1);
break;
case 4:
orc_neon64_emit_binary (compiler, "zip1", 0x0e803800,
compiler->vars[insn->dest_args[0]],
tmpreg,
tmpreg, compiler->insn_shift - 1);
break;
}
src->update_type = 1;
}
static void
neon_rule_loadpX (OrcCompiler *compiler, void *user, OrcInstruction *insn)
{
......@@ -4388,6 +4508,7 @@ orc_compiler_neon_register_rules (OrcTarget *target)
orc_rule_register (rule_set, "loadpw", neon_rule_loadpX, (void *)2);
orc_rule_register (rule_set, "loadpl", neon_rule_loadpX, (void *)4);
orc_rule_register (rule_set, "loadpq", neon_rule_loadpX, (void *)8);
orc_rule_register (rule_set, "loadupdb", neon_rule_loadupdb, (void *)0);
orc_rule_register (rule_set, "loadb", neon_rule_loadX, (void *)0);
orc_rule_register (rule_set, "loadw", neon_rule_loadX, (void *)0);
orc_rule_register (rule_set, "loadl", neon_rule_loadX, (void *)0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment