Commit 8c97bb38 authored by Alyssa Rosenzweig's avatar Alyssa Rosenzweig 💜

I tried...

parent b3b8bc9f
......@@ -166,9 +166,6 @@ trans_texture_swizzle_bpp4_align16_neon1(int width, int height, int source_strid
int block_start_s = block_y * block_pitch * 256;
int source_start = y * source_stride;
uint32_t d_start = ((uintptr_t) ldest) + block_start_s;
uintptr_t psrc = (uintptr_t) (pixels + source_start);
uint32x4_t PSrc = vdupq_n_u32(psrc);
......@@ -192,8 +189,19 @@ trans_texture_swizzle_bpp4_align16_neon1(int width, int height, int source_strid
const uint32_t *src1 = (const uint32_t *) vgetq_lane_u32(U_Src, 0);
uint32_t *dst1 = (uint32_t *) vgetq_lane_u32(U_Dst, 0);
for (int j = 0; j < 16; ++j) {
dst1[space[j]] = src1[j];
for (int j = 0; j < 16; j += 4) {
uint32x4_t s = vld1q_u32(&src1[j]);
uint32x4_t p = vld1q_u32(&space[j]);
vst1q_lane_u32(dst1 + vgetq_lane_u32(p, 0), s, 0);
vst1q_lane_u32(dst1 + vgetq_lane_u32(p, 1), s, 1);
vst1q_lane_u32(dst1 + vgetq_lane_u32(p, 2), s, 2);
vst1q_lane_u32(dst1 + vgetq_lane_u32(p, 3), s, 3);
#if 0
dst1[vgetq_lane_u32(p, 0)] = vgetq_lane_u32(s, 0);
dst1[vgetq_lane_u32(p, 1)] = vgetq_lane_u32(s, 1);
dst1[vgetq_lane_u32(p, 2)] = vgetq_lane_u32(s, 2);
dst1[vgetq_lane_u32(p, 3)] = vgetq_lane_u32(s, 3);
#endif
}
const uint32_t *src2 = (const uint32_t *) vgetq_lane_u32(U_Src, 1);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment