Commit f1d8693e authored by Marek Vasut's avatar Marek Vasut
Browse files

aarch32: Implement loadupdb instruction

Fill in aarch32 opcodes for loadupdb instruction, which is used
by various color space conversion programs. There is likely still
some space for optimization.
parent 5cf9420f
Pipeline #224498 failed with stage
in 3 minutes and 59 seconds
......@@ -311,7 +311,6 @@ orc_compiler_neon_init (OrcCompiler *compiler)
compiler->unroll_shift = 0;
}
if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */
for(i=0;i<compiler->n_insns;i++){
OrcInstruction *insn = compiler->insns + i;
OrcStaticOpcode *opcode = insn->opcode;
......@@ -320,7 +319,6 @@ orc_compiler_neon_init (OrcCompiler *compiler)
compiler->vars[insn->src_args[0]].need_offset_reg = TRUE;
}
}
}
if (0) {
compiler->need_mask_regs = TRUE;
......@@ -355,20 +353,27 @@ orc_neon_load_constants_outer (OrcCompiler *compiler)
orc_compiler_emit_invariants (compiler);
if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */
for(i=0;i<compiler->n_insns;i++){
OrcInstruction *insn = compiler->insns + i;
OrcStaticOpcode *opcode = insn->opcode;
if (strcmp (opcode->name, "loadupdb") == 0) {
if (compiler->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
if (compiler->is_64bit) {
orc_arm64_emit_load_reg (compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->exec_reg,
ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]));
} else {
orc_arm_emit_load_reg (compiler,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->exec_reg,
ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]));
}
} else {
if (!compiler->vars[insn->src_args[0]].ptr_offset)
continue;
if (compiler->is_64bit) {
if (!compiler->vars[insn->src_args[1]].value.i)
orc_arm64_emit_eor(compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
......@@ -378,6 +383,16 @@ orc_neon_load_constants_outer (OrcCompiler *compiler)
orc_arm64_emit_load_imm(compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[1]].value.i);
} else {
if (!compiler->vars[insn->src_args[1]].value.i)
orc_arm_emit_eor_r(compiler, ORC_ARM_COND_AL, 0,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[0]].ptr_offset);
else
orc_arm_emit_load_imm(compiler,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[1]].value.i);
}
}
}
......@@ -411,6 +426,11 @@ orc_neon_load_constants_inner (OrcCompiler *compiler)
orc_arm_emit_load_reg (compiler,
compiler->vars[i].ptr_register,
compiler->exec_reg, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
if (compiler->vars[i].ptr_offset)
orc_arm_emit_eor_r(compiler, ORC_ARM_COND_AL, 0,
compiler->vars[i].ptr_offset,
compiler->vars[i].ptr_offset,
compiler->vars[i].ptr_offset);
}
break;
case ORC_VAR_TYPE_ACCUMULATOR:
......@@ -1182,6 +1202,12 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index)
compiler->vars[k].size << compiler->loop_shift);
}
} else {
if (compiler->vars[k].ptr_offset) {
orc_arm_emit_add_imm (compiler,
compiler->vars[k].ptr_offset,
compiler->vars[k].ptr_offset,
compiler->vars[k].size << compiler->loop_shift);
} else if (compiler->vars[k].ptr_register) {
orc_arm_emit_add_imm (compiler,
compiler->vars[k].ptr_register,
compiler->vars[k].ptr_register,
......@@ -1189,6 +1215,7 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index)
}
}
}
}
}
#define NEON_BINARY(code,a,b,c) \
......
......@@ -1080,22 +1080,19 @@ static void
neon_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn)
{
OrcVariable *src = compiler->vars + insn->src_args[0];
OrcVariable *dest = compiler->vars + insn->dest_args[0];
unsigned int code = 0;
int size = src->size << compiler->insn_shift;
ORC_ASSERT(src->ptr_register); /* can ptr_register be 0 ? */
int ptr_reg;
if (!compiler->is_64bit) {
ORC_COMPILER_ERROR(compiler, "loadupdb is implemented only on aarch64");
return;
}
/* FIXME this should be fixed at a higher level */
if (src->vartype != ORC_VAR_TYPE_SRC && src->vartype != ORC_VAR_TYPE_DEST) {
ORC_COMPILER_ERROR(compiler, "loadX used with non src/dest");
return;
}
if (compiler->is_64bit) {
if (src->ptr_offset) {
ptr_reg = compiler->gp_tmpreg;
orc_arm64_emit_add_lsr(compiler, 64, ptr_reg, src->ptr_register, src->ptr_offset, 1);
......@@ -1192,6 +1189,118 @@ neon_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn)
tmpreg, compiler->insn_shift - 1);
break;
}
} else {
if (src->ptr_offset) {
ptr_reg = compiler->gp_tmpreg;
orc_arm_emit_add_rsi(compiler, ORC_ARM_COND_AL, 0,
ptr_reg, src->ptr_register,
src->ptr_offset, ORC_ARM_LSR, 1);
} else {
ptr_reg = src->ptr_register;
}
if (size > 8) {
if (src->is_aligned) {
if (size == 32) {
ORC_ASM_CODE(compiler," vld1.64 { %s, %s, %s, %s }, [%s,:256]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_neon_reg_name (dest->alloc + 2),
orc_neon_reg_name (dest->alloc + 3),
orc_arm_reg_name (ptr_reg));
code = 0xf42002dd;
} else if (size == 16) {
ORC_ASM_CODE(compiler," vld1.64 { %s, %s }, [%s,:128]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_arm_reg_name (ptr_reg));
code = 0xf4200aed;
} else if (size == 8) {
ORC_ASM_CODE(compiler," vld1.64 %s, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_arm_reg_name (ptr_reg));
code = 0xf42007cd;
} else {
ORC_COMPILER_ERROR(compiler,"bad aligned load size %d",
src->size << compiler->insn_shift);
}
} else {
if (size == 32) {
ORC_ASM_CODE(compiler," vld1.8 { %s, %s, %s, %s }, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_neon_reg_name (dest->alloc + 2),
orc_neon_reg_name (dest->alloc + 3),
orc_arm_reg_name (ptr_reg));
code = 0xf420020d;
} else if (size == 16) {
ORC_ASM_CODE(compiler," vld1.8 { %s, %s }, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_arm_reg_name (ptr_reg));
code = 0xf4200a0d;
} else if (size == 8) {
ORC_ASM_CODE(compiler," vld1.8 %s, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_arm_reg_name (ptr_reg));
code = 0xf420070d;
} else {
ORC_COMPILER_ERROR(compiler,"bad unaligned load size %d",
src->size << compiler->insn_shift);
}
}
} else {
int shift;
if (size == 4) {
shift = 2;
} else if (size == 2) {
shift = 1;
} else {
shift = 0;
}
ORC_ASM_CODE(compiler," vld1.%d %s[0], [%s]\n",
8<<shift,
orc_neon_reg_name (dest->alloc),
orc_arm_reg_name (ptr_reg));
code = 0xf4a0000d;
code |= shift<<10;
code |= (0&7)<<5;
}
code |= (ptr_reg&0xf) << 16;
code |= (dest->alloc&0xf) << 12;
code |= ((dest->alloc>>4)&0x1) << 22;
code |= 1 << 1;
orc_arm_emit (compiler, code);
switch (src->size) {
case 1:
orc_neon_emit_binary (compiler, "vorr", 0xf2200110,
compiler->vars[insn->dest_args[0]].alloc + 1,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc);
orc_neon_emit_unary (compiler, "vzip.8", 0xf3b20180,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc + 1);
break;
case 2:
orc_neon_emit_binary (compiler, "vorr", 0xf2200110,
compiler->vars[insn->dest_args[0]].alloc + 1,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc);
orc_neon_emit_unary (compiler, "vzip.16", 0xf3b60180,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc + 1);
break;
case 4:
orc_neon_emit_binary (compiler, "vorr", 0xf2200110,
compiler->vars[insn->dest_args[0]].alloc + 1,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc);
orc_neon_emit_unary_quad (compiler, "vzip.32", 0xf3ba0180,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc + 1);
break;
}
}
src->update_type = 1;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment