Commit 637bc6f7 authored by Wim Taymans's avatar Wim Taymans
Browse files

channelmix: add optimised NxM channelmix functions

parent 10f1d545
Pipeline #700588 passed with stages
in 3 minutes
......@@ -94,16 +94,16 @@ channelmix_f32_n_m_c(struct channelmix *mix, void * SPA_RESTRICT dst[],
clear_c(d[i], n_samples);
}
else {
for (n = 0; n < n_samples; n++) {
for (i = 0; i < n_dst; i++) {
for (i = 0; i < n_dst; i++) {
float *mi = mix->matrix[i], *di = d[i];
for (n = 0; n < n_samples; n++) {
float sum = 0.0f;
for (j = 0; j < n_src; j++)
sum += s[j][n] * mix->matrix[i][j];
d[i][n] = sum;
sum += s[j][n] * mi[j];
di[n] = sum;
}
}
for (i = 0; i < n_dst; i++)
lr4_process(&mix->lr4[i], d[i], d[i], 1.0f, n_samples);
}
}
}
......
......@@ -78,6 +78,50 @@ void channelmix_copy_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
vol_sse(d[i], s[i], mix->matrix[i][i], n_samples);
}
void
channelmix_f32_n_m_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
const void * SPA_RESTRICT src[], uint32_t n_samples)
{
float **d = (float **) dst;
const float **s = (const float **) src;
uint32_t n, unrolled;
uint32_t i, j, n_dst = mix->dst_chan, n_src = mix->src_chan;
__m128 mi[n_src], sum[2];
bool aligned = true;
for (j = 0; j < n_src; j++)
aligned &= SPA_IS_ALIGNED(s[j], 16);
for (i = 0; i < n_dst; i++) {
float *di = d[i];
for (j = 0; j < n_src; j++)
mi[j] = _mm_set1_ps(mix->matrix[i][j]);
if (aligned && SPA_IS_ALIGNED(d[i], 16))
unrolled = n_samples & ~7;
else
unrolled = 0;
for (n = 0; n < unrolled; n += 8) {
sum[0] = sum[1] = _mm_setzero_ps();
for (j = 0; j < n_src; j++) {
sum[0] = _mm_add_ps(sum[0], _mm_mul_ps(_mm_load_ps(&s[j][n + 0]), mi[j]));
sum[1] = _mm_add_ps(sum[1], _mm_mul_ps(_mm_load_ps(&s[j][n + 4]), mi[j]));
}
_mm_store_ps(&di[n + 0], sum[0]);
_mm_store_ps(&di[n + 4], sum[1]);
}
for (; n < n_samples; n++) {
sum[0] = _mm_setzero_ps();
for (j = 0; j < n_src; j++)
sum[0] = _mm_add_ss(sum[0], _mm_mul_ss(_mm_load_ss(&s[j][n]), mi[j]));
_mm_store_ss(&di[n], sum[0]);
}
lr4_process(&mix->lr4[i], d[i], d[i], 1.0f, n_samples);
}
}
/* FL+FR+FC+LFE -> FL+FR */
void
channelmix_f32_3p1_2_sse(struct channelmix *mix, void * SPA_RESTRICT dst[],
......
......@@ -94,6 +94,9 @@ static const struct channelmix_info {
MAKE(8, MASK_7_1, 4, MASK_QUAD, channelmix_f32_7p1_4_c),
MAKE(8, MASK_7_1, 4, MASK_3_1, channelmix_f32_7p1_3p1_c),
#if defined (HAVE_SSE)
MAKE(ANY, 0, ANY, 0, channelmix_f32_n_m_sse),
#endif
MAKE(ANY, 0, ANY, 0, channelmix_f32_n_m_c),
};
#undef MAKE
......
......@@ -147,6 +147,7 @@ DEFINE_FUNCTION(f32_7p1_4, c);
#if defined (HAVE_SSE)
DEFINE_FUNCTION(copy, sse);
DEFINE_FUNCTION(f32_n_m, sse);
DEFINE_FUNCTION(f32_3p1_2, sse);
DEFINE_FUNCTION(f32_5p1_2, sse);
DEFINE_FUNCTION(f32_5p1_3p1, sse);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment