Commit e247e879 authored by David Schleef's avatar David Schleef

neon: Add quad load/store

parent ee9cc690
...@@ -175,16 +175,16 @@ orc_compiler_neon_init (OrcCompiler *compiler) ...@@ -175,16 +175,16 @@ orc_compiler_neon_init (OrcCompiler *compiler)
switch (orc_program_get_max_array_size (compiler->program)) { switch (orc_program_get_max_array_size (compiler->program)) {
case 1: case 1:
loop_shift = 3; loop_shift = 4;
break; break;
case 2: case 2:
loop_shift = 2; loop_shift = 3;
break; break;
case 4: case 4:
loop_shift = 1; loop_shift = 2;
break; break;
case 8: case 8:
loop_shift = 0; loop_shift = 1;
break; break;
default: default:
ORC_ERROR("unhandled max var size %d", ORC_ERROR("unhandled max var size %d",
......
...@@ -329,16 +329,57 @@ orc_neon_load_halfvec_unaligned (OrcCompiler *compiler, OrcVariable *var, ...@@ -329,16 +329,57 @@ orc_neon_load_halfvec_unaligned (OrcCompiler *compiler, OrcVariable *var,
#endif #endif
} }
void
orc_neon_load_twovec_aligned (OrcCompiler *compiler, OrcVariable *var, int update)
{
orc_uint32 code;
ORC_ASM_CODE(compiler," vld1.64 { %s, %s }, [%s,:128]%s\n",
orc_neon_reg_name (var->alloc),
orc_neon_reg_name (var->alloc + 1),
orc_arm_reg_name (var->ptr_register),
update ? "!" : "");
code = 0xf4200acd;
code |= (var->ptr_register&0xf) << 16;
code |= (var->alloc&0xf) << 12;
code |= ((var->alloc>>4)&0x1) << 22;
code |= (!update) << 1;
orc_arm_emit (compiler, code);
}
void
orc_neon_load_twovec_unaligned (OrcCompiler *compiler, OrcVariable *var,
int update)
{
orc_uint32 code;
ORC_ASM_CODE(compiler," vld1.8 { %s, %s }, [%s]%s\n",
orc_neon_reg_name (var->alloc),
orc_neon_reg_name (var->alloc + 1),
orc_arm_reg_name (var->ptr_register),
update ? "!" : "");
code = 0xf4200a0d;
code |= (var->ptr_register&0xf) << 16;
code |= ((var->alloc)&0xf) << 12;
code |= (((var->alloc)>>4)&0x1) << 22;
code |= (!update) << 1;
orc_arm_emit (compiler, code);
}
void void
orc_neon_loadb (OrcCompiler *compiler, OrcVariable *var, int update) orc_neon_loadb (OrcCompiler *compiler, OrcVariable *var, int update)
{ {
orc_uint32 code; orc_uint32 code;
int i; int i;
if (var->is_aligned && compiler->loop_shift == 3) { if (var->is_aligned && compiler->loop_shift == 4) {
orc_neon_load_twovec_aligned (compiler, var, update);
} else if (var->is_aligned && compiler->loop_shift == 3) {
orc_neon_load_vec_aligned (compiler, var, update); orc_neon_load_vec_aligned (compiler, var, update);
} else if (var->is_aligned && compiler->loop_shift == 2) { } else if (var->is_aligned && compiler->loop_shift == 2) {
orc_neon_load_halfvec_aligned (compiler, var, update); orc_neon_load_halfvec_aligned (compiler, var, update);
} else if (compiler->loop_shift == 4) {
orc_neon_load_twovec_unaligned (compiler, var, update);
} else if (compiler->loop_shift == 3) { } else if (compiler->loop_shift == 3) {
orc_neon_load_vec_unaligned (compiler, var, update); orc_neon_load_vec_unaligned (compiler, var, update);
} else if (compiler->loop_shift == 2) { } else if (compiler->loop_shift == 2) {
...@@ -364,12 +405,18 @@ orc_neon_loadb (OrcCompiler *compiler, OrcVariable *var, int update) ...@@ -364,12 +405,18 @@ orc_neon_loadb (OrcCompiler *compiler, OrcVariable *var, int update)
void void
orc_neon_loadw (OrcCompiler *compiler, OrcVariable *var, int update) orc_neon_loadw (OrcCompiler *compiler, OrcVariable *var, int update)
{ {
if (var->is_aligned && compiler->loop_shift == 2) { if (var->is_aligned && compiler->loop_shift == 3) {
orc_neon_load_vec_aligned (compiler, var, update);
} else if (var->is_aligned && compiler->loop_shift == 2) {
orc_neon_load_vec_aligned (compiler, var, update); orc_neon_load_vec_aligned (compiler, var, update);
} else if (var->is_aligned && compiler->loop_shift == 1) { } else if (var->is_aligned && compiler->loop_shift == 1) {
orc_neon_load_halfvec_aligned (compiler, var, update); orc_neon_load_halfvec_aligned (compiler, var, update);
} else if (compiler->loop_shift == 2 && var->mask_alloc) { } else if (compiler->loop_shift == 3) {
orc_neon_load_twovec_unaligned (compiler, var, update);
} else if (compiler->loop_shift == 2) {
orc_neon_load_vec_unaligned (compiler, var, update); orc_neon_load_vec_unaligned (compiler, var, update);
} else if (compiler->loop_shift == 1) {
orc_neon_load_halfvec_unaligned (compiler, var, update);
} else { } else {
orc_uint32 code; orc_uint32 code;
int i; int i;
...@@ -404,8 +451,14 @@ orc_neon_loadl (OrcCompiler *compiler, OrcVariable *var, int update) ...@@ -404,8 +451,14 @@ orc_neon_loadl (OrcCompiler *compiler, OrcVariable *var, int update)
orc_uint32 code; orc_uint32 code;
int i; int i;
if (var->is_aligned && compiler->loop_shift == 1) { if (var->is_aligned && compiler->loop_shift == 2) {
orc_neon_load_twovec_aligned (compiler, var, update);
} else if (var->is_aligned && compiler->loop_shift == 1) {
orc_neon_load_vec_aligned (compiler, var, update); orc_neon_load_vec_aligned (compiler, var, update);
} else if (compiler->loop_shift == 2) {
orc_neon_load_twovec_unaligned (compiler, var, update);
} else if (compiler->loop_shift == 1) {
orc_neon_load_vec_unaligned (compiler, var, update);
} else { } else {
if (compiler->loop_shift > 0) { if (compiler->loop_shift > 0) {
//ORC_ERROR("slow load"); //ORC_ERROR("slow load");
...@@ -448,8 +501,20 @@ orc_neon_storeb (OrcCompiler *compiler, int dest, int update, int src1, int is_a ...@@ -448,8 +501,20 @@ orc_neon_storeb (OrcCompiler *compiler, int dest, int update, int src1, int is_a
orc_uint32 code; orc_uint32 code;
int i; int i;
if (is_aligned && compiler->loop_shift == 3) { if (is_aligned && compiler->loop_shift == 4) {
ORC_ASM_CODE(compiler," vst1.8 %s, [%s@64]%s\n", ORC_ASM_CODE(compiler," vst1.8 { %s, %s }, [%s,:128]%s\n",
orc_neon_reg_name (src1),
orc_neon_reg_name (src1+1),
orc_arm_reg_name (dest),
update ? "!" : "");
code = 0xf4000a2d;
code |= (dest&0xf) << 16;
code |= (src1&0xf) << 12;
code |= ((src1>>4)&0x1) << 22;
code |= (!update) << 1;
orc_arm_emit (compiler, code);
} else if (is_aligned && compiler->loop_shift == 3) {
ORC_ASM_CODE(compiler," vst1.8 %s, [%s,:64]%s\n",
orc_neon_reg_name (src1), orc_neon_reg_name (src1),
orc_arm_reg_name (dest), orc_arm_reg_name (dest),
update ? "!" : ""); update ? "!" : "");
...@@ -482,8 +547,20 @@ orc_neon_storew (OrcCompiler *compiler, int dest, int update, int src1, int is_a ...@@ -482,8 +547,20 @@ orc_neon_storew (OrcCompiler *compiler, int dest, int update, int src1, int is_a
orc_uint32 code; orc_uint32 code;
int i; int i;
if (is_aligned && compiler->loop_shift == 2) { if (is_aligned && compiler->loop_shift == 3) {
ORC_ASM_CODE(compiler," vst1.16 %s, [%s@64]%s\n", ORC_ASM_CODE(compiler," vst1.16 { %s, %s }, [%s,:128]%s\n",
orc_neon_reg_name (src1),
orc_neon_reg_name (src1 + 1),
orc_arm_reg_name (dest),
update ? "!" : "");
code = 0xf4000a6d;
code |= (dest&0xf) << 16;
code |= (src1&0xf) << 12;
code |= ((src1>>4)&0x1) << 22;
code |= (!update) << 1;
orc_arm_emit (compiler, code);
} else if (is_aligned && compiler->loop_shift == 2) {
ORC_ASM_CODE(compiler," vst1.16 %s, [%s,:64]%s\n",
orc_neon_reg_name (src1), orc_neon_reg_name (src1),
orc_arm_reg_name (dest), orc_arm_reg_name (dest),
update ? "!" : ""); update ? "!" : "");
...@@ -516,8 +593,20 @@ orc_neon_storel (OrcCompiler *compiler, int dest, int update, int src1, int is_a ...@@ -516,8 +593,20 @@ orc_neon_storel (OrcCompiler *compiler, int dest, int update, int src1, int is_a
orc_uint32 code; orc_uint32 code;
int i; int i;
if (is_aligned && compiler->loop_shift == 2) { if (is_aligned && compiler->loop_shift == 3) {
ORC_ASM_CODE(compiler," vst1.32 %s, [%s@64]%s\n", ORC_ASM_CODE(compiler," vst1.32 { %s, %s }, [%s,:128]%s\n",
orc_neon_reg_name (src1),
orc_neon_reg_name (src1 + 1),
orc_arm_reg_name (dest),
update ? "!" : "");
code = 0xf4000a9d;
code |= (dest&0xf) << 16;
code |= (src1&0xf) << 12;
code |= ((src1>>4)&0x1) << 22;
code |= (!update) << 1;
orc_arm_emit (compiler, code);
} else if (is_aligned && compiler->loop_shift == 2) {
ORC_ASM_CODE(compiler," vst1.32 %s, [%s,:64]%s\n",
orc_neon_reg_name (src1), orc_neon_reg_name (src1),
orc_arm_reg_name (dest), orc_arm_reg_name (dest),
update ? "!" : ""); update ? "!" : "");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment