GitLab will be down for maintenance this Sunday 13th June, from approx 7-11am UTC. This is for a PostgreSQL migration. See the tracker issue for more informations.

Commit 4be29c11 authored by Wim Taymans's avatar Wim Taymans
Browse files

resample: load more data on aarch64 because we can

parent ecead2b7
...@@ -43,10 +43,8 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s, ...@@ -43,10 +43,8 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
" bne 4f\n" " bne 4f\n"
" b 5f\n" " b 5f\n"
"1:" "1:"
" ld1 {v4.4s, v5.4s}, [%[taps]], #32\n" " ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[taps]], #64\n"
" ld1 {v8.4s, v9.4s}, [%[s]], #32\n" " ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%[s]], #64\n"
" ld1 {v6.4s, v7.4s}, [%[taps]], #32\n"
" ld1 {v10.4s, v11.4s}, [%[s]], #32\n"
" subs %[n_taps], %[n_taps], #16\n" " subs %[n_taps], %[n_taps], #16\n"
" fmul v0.4s, v4.4s, v8.4s\n" " fmul v0.4s, v4.4s, v8.4s\n"
" fmul v1.4s, v5.4s, v9.4s\n" " fmul v1.4s, v5.4s, v9.4s\n"
...@@ -54,10 +52,8 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s, ...@@ -54,10 +52,8 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
" fmul v3.4s, v7.4s, v11.4s\n" " fmul v3.4s, v7.4s, v11.4s\n"
" beq 3f\n" " beq 3f\n"
"2:" "2:"
" ld1 { v4.4s, v5.4s}, [%[taps]], #32\n" " ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [%[taps]], #64\n"
" ld1 { v8.4s, v9.4s}, [%[s]], #32\n" " ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [%[s]], #64\n"
" ld1 { v6.4s, v7.4s}, [%[taps]], #32\n"
" ld1 { v10.4s, v11.4s}, [%[s]], #32\n"
" subs %[n_taps], %[n_taps], #16\n" " subs %[n_taps], %[n_taps], #16\n"
" fmla v0.4s, v4.4s, v8.4s\n" " fmla v0.4s, v4.4s, v8.4s\n"
" fmla v1.4s, v5.4s, v9.4s\n" " fmla v1.4s, v5.4s, v9.4s\n"
...@@ -71,8 +67,8 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s, ...@@ -71,8 +67,8 @@ static void inner_product_neon(float *d, const float * SPA_RESTRICT s,
" fadd v0.4s, v4.4s, v5.4s\n" " fadd v0.4s, v4.4s, v5.4s\n"
" beq 5f\n" " beq 5f\n"
"4:" "4:"
" ld1 { v6.4s }, [%[taps]], #16\n" " ld1 {v6.4s}, [%[taps]], #16\n"
" ld1 { v10.4s }, [%[s]], #16\n" " ld1 {v10.4s}, [%[s]], #16\n"
" subs %[remainder], %[remainder], #4\n" " subs %[remainder], %[remainder], #4\n"
" fmla v0.4s, v6.4s, v10.4s\n" " fmla v0.4s, v6.4s, v10.4s\n"
" bne 4b\n" " bne 4b\n"
...@@ -158,9 +154,9 @@ static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s, ...@@ -158,9 +154,9 @@ static void inner_product_ip_neon(float *d, const float * SPA_RESTRICT s,
" fmul v3.4s, v7.4s, v9.4s\n" " fmul v3.4s, v7.4s, v9.4s\n"
" beq 3f\n" " beq 3f\n"
"2:" "2:"
" ld1 { v4.4s, v5.4s}, [%[t0]], #32\n" " ld1 {v4.4s, v5.4s}, [%[t0]], #32\n"
" ld1 { v8.4s, v9.4s}, [%[s]], #32\n" " ld1 {v8.4s, v9.4s}, [%[s]], #32\n"
" ld1 { v6.4s, v7.4s}, [%[t1]], #32\n" " ld1 {v6.4s, v7.4s}, [%[t1]], #32\n"
" subs %[n_taps], %[n_taps], #8\n" " subs %[n_taps], %[n_taps], #8\n"
" fmla v0.4s, v4.4s, v8.4s\n" " fmla v0.4s, v4.4s, v8.4s\n"
" fmla v1.4s, v5.4s, v9.4s\n" " fmla v1.4s, v5.4s, v9.4s\n"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment