Commit f1d8693e authored by Marek Vasut's avatar Marek Vasut
Browse files

aarch32: Implement loadupdb instruction

Fill in aarch32 opcodes for loadupdb instruction, which is used
by various color space conversion programs. There is likely still
some space for optimization.
parent 5cf9420f
Pipeline #224498 failed with stage
in 3 minutes and 59 seconds
...@@ -311,14 +311,12 @@ orc_compiler_neon_init (OrcCompiler *compiler) ...@@ -311,14 +311,12 @@ orc_compiler_neon_init (OrcCompiler *compiler)
compiler->unroll_shift = 0; compiler->unroll_shift = 0;
} }
if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */ for(i=0;i<compiler->n_insns;i++){
for(i=0;i<compiler->n_insns;i++){ OrcInstruction *insn = compiler->insns + i;
OrcInstruction *insn = compiler->insns + i; OrcStaticOpcode *opcode = insn->opcode;
OrcStaticOpcode *opcode = insn->opcode;
if (strcmp (opcode->name, "loadupdb") == 0) { if (strcmp (opcode->name, "loadupdb") == 0) {
compiler->vars[insn->src_args[0]].need_offset_reg = TRUE; compiler->vars[insn->src_args[0]].need_offset_reg = TRUE;
}
} }
} }
...@@ -355,29 +353,46 @@ orc_neon_load_constants_outer (OrcCompiler *compiler) ...@@ -355,29 +353,46 @@ orc_neon_load_constants_outer (OrcCompiler *compiler)
orc_compiler_emit_invariants (compiler); orc_compiler_emit_invariants (compiler);
if (compiler->is_64bit) { /* The loadupdb is aarch64 only so far */ for(i=0;i<compiler->n_insns;i++){
for(i=0;i<compiler->n_insns;i++){ OrcInstruction *insn = compiler->insns + i;
OrcInstruction *insn = compiler->insns + i; OrcStaticOpcode *opcode = insn->opcode;
OrcStaticOpcode *opcode = insn->opcode;
if (strcmp (opcode->name, "loadupdb") == 0) { if (strcmp (opcode->name, "loadupdb") == 0) {
if (compiler->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) { if (compiler->vars[insn->src_args[1]].vartype == ORC_VAR_TYPE_PARAM) {
orc_arm64_emit_load_reg (compiler, 64, if (compiler->is_64bit) {
orc_arm64_emit_load_reg (compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->exec_reg,
ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]));
} else {
orc_arm_emit_load_reg (compiler,
compiler->vars[insn->src_args[0]].ptr_offset, compiler->vars[insn->src_args[0]].ptr_offset,
compiler->exec_reg, compiler->exec_reg,
ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]])); ORC_STRUCT_OFFSET(OrcExecutor, params[insn->src_args[1]]));
}
} else {
if (!compiler->vars[insn->src_args[0]].ptr_offset)
continue;
if (compiler->is_64bit) {
if (!compiler->vars[insn->src_args[1]].value.i)
orc_arm64_emit_eor(compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[0]].ptr_offset);
else
orc_arm64_emit_load_imm(compiler, 64,
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[1]].value.i);
} else { } else {
if (!compiler->vars[insn->src_args[0]].ptr_offset) if (!compiler->vars[insn->src_args[1]].value.i)
continue; orc_arm_emit_eor_r(compiler, ORC_ARM_COND_AL, 0,
if (!compiler->vars[insn->src_args[1]].value.i) compiler->vars[insn->src_args[0]].ptr_offset,
orc_arm64_emit_eor(compiler, 64, compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[0]].ptr_offset, compiler->vars[insn->src_args[0]].ptr_offset);
compiler->vars[insn->src_args[0]].ptr_offset, else
compiler->vars[insn->src_args[0]].ptr_offset); orc_arm_emit_load_imm(compiler,
else compiler->vars[insn->src_args[0]].ptr_offset,
orc_arm64_emit_load_imm(compiler, 64, compiler->vars[insn->src_args[1]].value.i);
compiler->vars[insn->src_args[0]].ptr_offset,
compiler->vars[insn->src_args[1]].value.i);
} }
} }
} }
...@@ -411,6 +426,11 @@ orc_neon_load_constants_inner (OrcCompiler *compiler) ...@@ -411,6 +426,11 @@ orc_neon_load_constants_inner (OrcCompiler *compiler)
orc_arm_emit_load_reg (compiler, orc_arm_emit_load_reg (compiler,
compiler->vars[i].ptr_register, compiler->vars[i].ptr_register,
compiler->exec_reg, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i])); compiler->exec_reg, ORC_STRUCT_OFFSET(OrcExecutor, arrays[i]));
if (compiler->vars[i].ptr_offset)
orc_arm_emit_eor_r(compiler, ORC_ARM_COND_AL, 0,
compiler->vars[i].ptr_offset,
compiler->vars[i].ptr_offset,
compiler->vars[i].ptr_offset);
} }
break; break;
case ORC_VAR_TYPE_ACCUMULATOR: case ORC_VAR_TYPE_ACCUMULATOR:
...@@ -1182,10 +1202,17 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index) ...@@ -1182,10 +1202,17 @@ orc_neon_emit_loop (OrcCompiler *compiler, int unroll_index)
compiler->vars[k].size << compiler->loop_shift); compiler->vars[k].size << compiler->loop_shift);
} }
} else { } else {
if (compiler->vars[k].ptr_offset) {
orc_arm_emit_add_imm (compiler,
compiler->vars[k].ptr_offset,
compiler->vars[k].ptr_offset,
compiler->vars[k].size << compiler->loop_shift);
} else if (compiler->vars[k].ptr_register) {
orc_arm_emit_add_imm (compiler, orc_arm_emit_add_imm (compiler,
compiler->vars[k].ptr_register, compiler->vars[k].ptr_register,
compiler->vars[k].ptr_register, compiler->vars[k].ptr_register,
compiler->vars[k].size << compiler->loop_shift); compiler->vars[k].size << compiler->loop_shift);
}
} }
} }
} }
......
...@@ -1080,117 +1080,226 @@ static void ...@@ -1080,117 +1080,226 @@ static void
neon_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn) neon_rule_loadupdb (OrcCompiler *compiler, void *user, OrcInstruction *insn)
{ {
OrcVariable *src = compiler->vars + insn->src_args[0]; OrcVariable *src = compiler->vars + insn->src_args[0];
OrcVariable *dest = compiler->vars + insn->dest_args[0];
unsigned int code = 0; unsigned int code = 0;
int size = src->size << compiler->insn_shift; int size = src->size << compiler->insn_shift;
ORC_ASSERT(src->ptr_register); /* can ptr_register be 0 ? */ ORC_ASSERT(src->ptr_register); /* can ptr_register be 0 ? */
int ptr_reg; int ptr_reg;
if (!compiler->is_64bit) {
ORC_COMPILER_ERROR(compiler, "loadupdb is implemented only on aarch64");
return;
}
/* FIXME this should be fixed at a higher level */ /* FIXME this should be fixed at a higher level */
if (src->vartype != ORC_VAR_TYPE_SRC && src->vartype != ORC_VAR_TYPE_DEST) { if (src->vartype != ORC_VAR_TYPE_SRC && src->vartype != ORC_VAR_TYPE_DEST) {
ORC_COMPILER_ERROR(compiler, "loadX used with non src/dest"); ORC_COMPILER_ERROR(compiler, "loadX used with non src/dest");
return; return;
} }
if (src->ptr_offset) { if (compiler->is_64bit) {
ptr_reg = compiler->gp_tmpreg; if (src->ptr_offset) {
orc_arm64_emit_add_lsr(compiler, 64, ptr_reg, src->ptr_register, src->ptr_offset, 1); ptr_reg = compiler->gp_tmpreg;
} else { orc_arm64_emit_add_lsr(compiler, 64, ptr_reg, src->ptr_register, src->ptr_offset, 1);
ptr_reg = src->ptr_register;
}
int opcode, flag;
if (size > 16) {
/** load multiple single-element structures to one, two, three, or four registers */
char vt_str[64];
memset(vt_str, '\x00', 64);
if (size == 64) {
snprintf(vt_str, 64, "%s, %s, %s, %s",
orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 2, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 3, 1, 1));
opcode = 0x2;
} else if (size == 32) {
snprintf(vt_str, 64, "%s, %s",
orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1));
opcode = 0xa;
} else if (size == 16) {
snprintf(vt_str, 64, "%s",
orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1));
opcode = 0x7;
} else { } else {
ORC_COMPILER_ERROR(compiler,"bad aligned load size %d", ptr_reg = src->ptr_register;
src->size << compiler->insn_shift);
return;
} }
flag = 0; /* Bytes */
int opcode, flag;
ORC_ASM_CODE(compiler," ld1 { %s }, [%s]\n",
vt_str, orc_arm64_reg_name (ptr_reg, 64)); if (size > 16) {
code = 0x0c400000; /** load multiple single-element structures to one, two, three, or four registers */
code |= 0 << 30; /* Q-bit */ char vt_str[64];
code |= (flag&0x3) << 10;
code |= (opcode&0xf) << 12; memset(vt_str, '\x00', 64);
} else {
/** load one single-element structure to one lane of one register */ if (size == 64) {
flag = 0; snprintf(vt_str, 64, "%s, %s, %s, %s",
if (size == 8) { orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
opcode = 4; orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1),
flag = 1; /* size==01 */ orc_neon64_reg_name_vector (compiler->tmpreg + 2, 1, 1),
} else if (size == 4) { orc_neon64_reg_name_vector (compiler->tmpreg + 3, 1, 1));
opcode = 4; opcode = 0x2;
} else if (size == 2) { } else if (size == 32) {
opcode = 2; snprintf(vt_str, 64, "%s, %s",
} else if (size == 1) { orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1),
opcode = 0; orc_neon64_reg_name_vector (compiler->tmpreg + 1, 1, 1));
opcode = 0xa;
} else if (size == 16) {
snprintf(vt_str, 64, "%s",
orc_neon64_reg_name_vector (compiler->tmpreg, 1, 1));
opcode = 0x7;
} else {
ORC_COMPILER_ERROR(compiler,"bad aligned load size %d",
src->size << compiler->insn_shift);
return;
}
flag = 0; /* Bytes */
ORC_ASM_CODE(compiler," ld1 { %s }, [%s]\n",
vt_str, orc_arm64_reg_name (ptr_reg, 64));
code = 0x0c400000;
code |= 0 << 30; /* Q-bit */
code |= (flag&0x3) << 10;
code |= (opcode&0xf) << 12;
} else { } else {
ORC_COMPILER_ERROR(compiler,"bad unaligned load size %d", /** load one single-element structure to one lane of one register */
src->size << compiler->insn_shift); flag = 0;
return; if (size == 8) {
opcode = 4;
flag = 1; /* size==01 */
} else if (size == 4) {
opcode = 4;
} else if (size == 2) {
opcode = 2;
} else if (size == 1) {
opcode = 0;
} else {
ORC_COMPILER_ERROR(compiler,"bad unaligned load size %d",
src->size << compiler->insn_shift);
return;
}
ORC_ASM_CODE(compiler," ld1 { %s }[0], [%s]\n",
orc_neon64_reg_name_vector_single (compiler->tmpreg, size),
orc_arm64_reg_name (ptr_reg, 64));
code = 0x0d400000;
code |= (opcode&0x7) << 13;
code |= (flag&0x3) << 10;
} }
ORC_ASM_CODE(compiler," ld1 { %s }[0], [%s]\n",
orc_neon64_reg_name_vector_single (compiler->tmpreg, size),
orc_arm64_reg_name (ptr_reg, 64));
code = 0x0d400000;
code |= (opcode&0x7) << 13;
code |= (flag&0x3) << 10;
}
code |= (ptr_reg&0x1f) << 5; code |= (ptr_reg&0x1f) << 5;
code |= (compiler->tmpreg&0x1f); code |= (compiler->tmpreg&0x1f);
orc_arm_emit (compiler, code); orc_arm_emit (compiler, code);
OrcVariable tmpreg = { .alloc = compiler->tmpreg, .size = compiler->vars[insn->src_args[0]].size }; OrcVariable tmpreg = { .alloc = compiler->tmpreg, .size = compiler->vars[insn->src_args[0]].size };
switch (src->size) {
case 1:
orc_neon64_emit_binary (compiler, "zip1", 0x0e003800,
compiler->vars[insn->dest_args[0]],
tmpreg,
tmpreg, compiler->insn_shift - 1);
break;
case 2:
orc_neon64_emit_binary (compiler, "zip1", 0x0e403800,
compiler->vars[insn->dest_args[0]],
tmpreg,
tmpreg, compiler->insn_shift - 1);
break;
case 4:
orc_neon64_emit_binary (compiler, "zip1", 0x0e803800,
compiler->vars[insn->dest_args[0]],
tmpreg,
tmpreg, compiler->insn_shift - 1);
break;
}
} else {
if (src->ptr_offset) {
ptr_reg = compiler->gp_tmpreg;
orc_arm_emit_add_rsi(compiler, ORC_ARM_COND_AL, 0,
ptr_reg, src->ptr_register,
src->ptr_offset, ORC_ARM_LSR, 1);
} else {
ptr_reg = src->ptr_register;
}
if (size > 8) {
if (src->is_aligned) {
if (size == 32) {
ORC_ASM_CODE(compiler," vld1.64 { %s, %s, %s, %s }, [%s,:256]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_neon_reg_name (dest->alloc + 2),
orc_neon_reg_name (dest->alloc + 3),
orc_arm_reg_name (ptr_reg));
code = 0xf42002dd;
} else if (size == 16) {
ORC_ASM_CODE(compiler," vld1.64 { %s, %s }, [%s,:128]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_arm_reg_name (ptr_reg));
code = 0xf4200aed;
} else if (size == 8) {
ORC_ASM_CODE(compiler," vld1.64 %s, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_arm_reg_name (ptr_reg));
code = 0xf42007cd;
} else {
ORC_COMPILER_ERROR(compiler,"bad aligned load size %d",
src->size << compiler->insn_shift);
}
} else {
if (size == 32) {
ORC_ASM_CODE(compiler," vld1.8 { %s, %s, %s, %s }, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_neon_reg_name (dest->alloc + 2),
orc_neon_reg_name (dest->alloc + 3),
orc_arm_reg_name (ptr_reg));
code = 0xf420020d;
} else if (size == 16) {
ORC_ASM_CODE(compiler," vld1.8 { %s, %s }, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_neon_reg_name (dest->alloc + 1),
orc_arm_reg_name (ptr_reg));
code = 0xf4200a0d;
} else if (size == 8) {
ORC_ASM_CODE(compiler," vld1.8 %s, [%s]\n",
orc_neon_reg_name (dest->alloc),
orc_arm_reg_name (ptr_reg));
code = 0xf420070d;
} else {
ORC_COMPILER_ERROR(compiler,"bad unaligned load size %d",
src->size << compiler->insn_shift);
}
}
} else {
int shift;
if (size == 4) {
shift = 2;
} else if (size == 2) {
shift = 1;
} else {
shift = 0;
}
ORC_ASM_CODE(compiler," vld1.%d %s[0], [%s]\n",
8<<shift,
orc_neon_reg_name (dest->alloc),
orc_arm_reg_name (ptr_reg));
code = 0xf4a0000d;
code |= shift<<10;
code |= (0&7)<<5;
}
code |= (ptr_reg&0xf) << 16;
code |= (dest->alloc&0xf) << 12;
code |= ((dest->alloc>>4)&0x1) << 22;
code |= 1 << 1;
orc_arm_emit (compiler, code);
switch (src->size) { switch (src->size) {
case 1: case 1:
orc_neon64_emit_binary (compiler, "zip1", 0x0e003800, orc_neon_emit_binary (compiler, "vorr", 0xf2200110,
compiler->vars[insn->dest_args[0]], compiler->vars[insn->dest_args[0]].alloc + 1,
tmpreg, compiler->vars[insn->dest_args[0]].alloc,
tmpreg, compiler->insn_shift - 1); compiler->vars[insn->dest_args[0]].alloc);
break; orc_neon_emit_unary (compiler, "vzip.8", 0xf3b20180,
case 2: compiler->vars[insn->dest_args[0]].alloc,
orc_neon64_emit_binary (compiler, "zip1", 0x0e403800, compiler->vars[insn->dest_args[0]].alloc + 1);
compiler->vars[insn->dest_args[0]], break;
tmpreg, case 2:
tmpreg, compiler->insn_shift - 1); orc_neon_emit_binary (compiler, "vorr", 0xf2200110,
break; compiler->vars[insn->dest_args[0]].alloc + 1,
case 4: compiler->vars[insn->dest_args[0]].alloc,
orc_neon64_emit_binary (compiler, "zip1", 0x0e803800, compiler->vars[insn->dest_args[0]].alloc);
compiler->vars[insn->dest_args[0]], orc_neon_emit_unary (compiler, "vzip.16", 0xf3b60180,
tmpreg, compiler->vars[insn->dest_args[0]].alloc,
tmpreg, compiler->insn_shift - 1); compiler->vars[insn->dest_args[0]].alloc + 1);
break; break;
case 4:
orc_neon_emit_binary (compiler, "vorr", 0xf2200110,
compiler->vars[insn->dest_args[0]].alloc + 1,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc);
orc_neon_emit_unary_quad (compiler, "vzip.32", 0xf3ba0180,
compiler->vars[insn->dest_args[0]].alloc,
compiler->vars[insn->dest_args[0]].alloc + 1);
break;
}
} }
src->update_type = 1; src->update_type = 1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment