Commit 5945cb5f authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜

Fake NEON setup

parent 65e88b18
......@@ -166,26 +166,29 @@ trans_texture_swizzle_bpp4_align16_neon1(int width, int height, int source_strid
int block_start_s = block_y * block_pitch * 256;
int source_start = y * source_stride;
uint32_t p_start = ((uintptr_t) pixels) + source_start;
uint32x2_t P_Start = vdup_n_u32(p_start);
uint32_t d_start = ((uintptr_t) ldest) + block_start_s;
uint32x2_t D_Start = vdup_n_u32(d_start);
uintptr_t psrc = (uintptr_t) (pixels + source_start);
uint32x2_t PSrc = vdup_n_u32(psrc);
uintptr_t pdst = (uintptr_t) (ldest + block_start_s);
uint32x2_t PDst = vdup_n_u32(pdst);
uint32_t *space = &space_filler[rem_y][0];
for (uint32_t x = 0; x < width; x += 16) {
int block_x_s = x << 4;
uint32x2_t X = vcreate_u32((uint64_t) x | ((((uint64_t) (x + 16)) << 32)));
uint32x2_t Block_x_s = vshl_n_u32(X, 6);
uint32x2_t X4 = vshl_n_u32(X, 2);
uintptr_t u_src = psrc + x*4;
uintptr_t u_dst = pdst + block_x_s*4;
uint32x2_t U_Src = vadd_u32(PSrc, X4);
uint32x2_t U_Dst = vadd_u32(PDst, Block_x_s);
for (int j = 0; j < 16; ++j) {
const uint32_t *src = (const uint32_t *) u_src;
uint32_t *dst = (uint32_t *) u_dst;
const uint32_t *src = (const uint32_t *) vget_lane_u32(U_Src, 0);
uint32_t *dst = (uint32_t *) vget_lane_u32(U_Dst, 0);
dst[space[j]] = src[j];
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment