Commit 4b5f7833 authored by Arun Raghavan's avatar Arun Raghavan Committed by Arun Raghavan

audioresample: Separate out CFLAGS used for SSE* code

This makes sure that we only build files that need explicit SIMD support
with the relevant CFLAGS. This allows the rest of the code to be built
without, and specific SSE* code is only called after runtime checks for
CPU features.

https://bugzilla.gnome.org/show_bug.cgi?id=729276
parent f4cba790
......@@ -179,6 +179,30 @@ dnl check for GCC specific SSE headers
dnl these are used by the speex resampler code
AC_CHECK_HEADERS([xmmintrin.h emmintrin.h smmintrin.h])
dnl also check which architecture we're on for building files with intrinsics
dnl separately
AC_CHECK_DECLS([__i386__], [HAVE_X86=1])
AC_CHECK_DECLS([__x86_64__], [HAVE_X86=1])
dnl check for -m* compiler flags too
SSE_CFLAGS="-msse"
SSE2_CFLAGS="-msse2"
SSE41_CFLAGS="-msse4.1"
AS_COMPILER_FLAG([$SSE_CFLAGS], [HAVE_SSE=1], [HAVE_SSE=0])
AS_COMPILER_FLAG([$SSE2_CFLAGS], [HAVE_SSE2=1], [HAVE_SSE2=0])
AS_COMPILER_FLAG([$SSE41_CFLAGS], [HAVE_SSE41=1], [HAVE_SSE41=0])
AM_CONDITIONAL(HAVE_X86, [test "x${HAVE_X86}" = "x1"])
AC_DEFINE_UNQUOTED(HAVE_SSE, [$HAVE_SSE], [SSE support is enabled])
AC_DEFINE_UNQUOTED(HAVE_SSE2, [$HAVE_SSE2], [SSE2 support is enabled])
AC_DEFINE_UNQUOTED(HAVE_SSE41, [$HAVE_SSE41], [SSE4.1 support is enabled])
AC_SUBST(SSE_CFLAGS)
AC_SUBST(SSE2_CFLAGS)
AC_SUBST(SSE41_CFLAGS)
dnl used in gst/tcp
AC_CHECK_HEADERS([sys/socket.h],
[HAVE_SYS_SOCKET_H="yes"], [HAVE_SYS_SOCKET_H="no"], [AC_INCLUDES_DEFAULT])
......
......@@ -82,8 +82,12 @@ nodist_libgstaudio_@GST_API_VERSION@include_HEADERS = \
audio-enumtypes.h
noinst_HEADERS = \
gstaudioutilsprivate.h \
audio-resampler-x86.h \
gstaudioutilsprivate.h \
audio-resampler-private.h \
audio-resampler-macros.h \
audio-resampler-x86.h \
audio-resampler-x86-sse.h \
audio-resampler-x86-sse2.h \
audio-resampler-neon.h
libgstaudio_@GST_API_VERSION@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) \
......@@ -93,6 +97,50 @@ libgstaudio_@GST_API_VERSION@_la_LIBADD = \
$(GST_BASE_LIBS) $(GST_LIBS) $(LIBM) $(ORC_LIBS)
libgstaudio_@GST_API_VERSION@_la_LDFLAGS = $(GST_LIB_LDFLAGS) $(GST_ALL_LDFLAGS) $(GST_LT_LDFLAGS)
# Arch-specific bits
noinst_LTLIBRARIES =
if HAVE_X86
# Don't use full GST_LT_LDFLAGS in LDFLAGS because we get things like
# -version-info that cause a warning on private libs
noinst_LTLIBRARIES += libaudio_resampler_sse.la
libaudio_resampler_sse_la_SOURCES = audio-resampler-x86-sse.c
libaudio_resampler_sse_la_CFLAGS = \
$(libgstaudio_@GST_API_VERSION@_la_CFLAGS) \
$(SSE_CFLAGS)
libaudio_resampler_sse_la_LDFLAGS = \
$(GST_LIB_LDFLAGS) \
$(GST_ALL_LDFLAGS)
libgstaudio_@GST_API_VERSION@_la_LIBADD += libaudio_resampler_sse.la
noinst_LTLIBRARIES += libaudio_resampler_sse2.la
libaudio_resampler_sse2_la_SOURCES = audio-resampler-x86-sse2.c
libaudio_resampler_sse2_la_CFLAGS = \
$(libgstaudio_@GST_API_VERSION@_la_CFLAGS) \
$(SSE2_CFLAGS)
libaudio_resampler_sse2_la_LDFLAGS = \
$(GST_LIB_LDFLAGS) \
$(GST_ALL_LDFLAGS)
libgstaudio_@GST_API_VERSION@_la_LIBADD += libaudio_resampler_sse2.la
noinst_LTLIBRARIES += libaudio_resampler_sse41.la
libaudio_resampler_sse41_la_SOURCES = audio-resampler-x86-sse41.c
libaudio_resampler_sse41_la_CFLAGS = \
$(libgstaudio_@GST_API_VERSION@_la_CFLAGS) \
$(SSE41_CFLAGS)
libaudio_resampler_sse41_la_LDFLAGS = \
$(GST_LIB_LDFLAGS) \
$(GST_ALL_LDFLAGS)
libgstaudio_@GST_API_VERSION@_la_LIBADD += libaudio_resampler_sse41.la
endif
# Introspection
include $(top_srcdir)/common/gst-glib-gen.mak
if HAVE_INTROSPECTION
......
/* GStreamer
* Copyright (C) <2015> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_AUDIO_RESAMPLER_MACROS_H__
#define __GST_AUDIO_RESAMPLER_MACROS_H__
#include <string.h>
#include "audio-resampler-private.h"
#define PRECISION_S16 15
#define PRECISION_S32 31
#define DECL_GET_TAPS_FULL_FUNC(type) \
gpointer \
get_taps_##type##_full (GstAudioResampler * resampler, \
gint *samp_index, gint *samp_phase, type icoeff[4])
DECL_GET_TAPS_FULL_FUNC (gint16);
DECL_GET_TAPS_FULL_FUNC (gint32);
DECL_GET_TAPS_FULL_FUNC (gfloat);
DECL_GET_TAPS_FULL_FUNC (gdouble);
#define DECL_GET_TAPS_INTERPOLATE_FUNC(type, inter) \
gpointer \
get_taps_##type##_##inter (GstAudioResampler * resampler, \
gint *samp_index, gint *samp_phase, type icoeff[4]) \
DECL_GET_TAPS_INTERPOLATE_FUNC (gint16, linear);
DECL_GET_TAPS_INTERPOLATE_FUNC (gint32, linear);
DECL_GET_TAPS_INTERPOLATE_FUNC (gfloat, linear);
DECL_GET_TAPS_INTERPOLATE_FUNC (gdouble, linear);
DECL_GET_TAPS_INTERPOLATE_FUNC (gint16, cubic);
DECL_GET_TAPS_INTERPOLATE_FUNC (gint32, cubic);
DECL_GET_TAPS_INTERPOLATE_FUNC (gfloat, cubic);
DECL_GET_TAPS_INTERPOLATE_FUNC (gdouble, cubic);
#define DECL_RESAMPLE_FUNC(type,inter,channels,arch) \
void \
resample_ ##type## _ ##inter## _ ##channels## _ ##arch (GstAudioResampler * resampler, \
gpointer in[], gsize in_len, gpointer out[], gsize out_len, \
gsize * consumed)
#define MAKE_RESAMPLE_FUNC(type,inter,channels,arch) \
DECL_RESAMPLE_FUNC (type, inter, channels, arch) \
{ \
gint c, di = 0; \
gint n_taps = resampler->n_taps; \
gint blocks = resampler->blocks; \
gint ostride = resampler->ostride; \
gint taps_stride = resampler->taps_stride; \
gint samp_index = 0; \
gint samp_phase = 0; \
\
for (c = 0; c < blocks; c++) { \
type *ip = in[c]; \
type *op = ostride == 1 ? out[c] : (type *)out[0] + c; \
\
samp_index = resampler->samp_index; \
samp_phase = resampler->samp_phase; \
\
for (di = 0; di < out_len; di++) { \
type *ipp, icoeff[4], *taps; \
\
ipp = &ip[samp_index * channels]; \
\
taps = get_taps_ ##type##_##inter \
(resampler, &samp_index, &samp_phase, icoeff); \
inner_product_ ##type##_##inter##_##channels##_##arch \
(op, ipp, taps, n_taps, icoeff, taps_stride); \
op += ostride; \
} \
if (in_len > samp_index) \
memmove (ip, &ip[samp_index * channels], \
(in_len - samp_index) * sizeof(type) * channels); \
} \
*consumed = samp_index - resampler->samp_index; \
\
resampler->samp_index = 0; \
resampler->samp_phase = samp_phase; \
}
#define DECL_RESAMPLE_FUNC_STATIC(type,inter,channels,arch) \
static DECL_RESAMPLE_FUNC (type, inter, channels, arch)
#define MAKE_RESAMPLE_FUNC_STATIC(type,inter,channels,arch) \
static MAKE_RESAMPLE_FUNC (type, inter, channels, arch)
#endif /* __GST_AUDIO_RESAMPLER_MACROS_H__ */
......@@ -650,17 +650,17 @@ interpolate_gfloat_cubic_neon (gpointer op, const gpointer ap,
"q10", "q11", "q12", "q13", "q14", "q15", "memory");
}
MAKE_RESAMPLE_FUNC (gint16, full, 1, neon);
MAKE_RESAMPLE_FUNC (gint16, linear, 1, neon);
MAKE_RESAMPLE_FUNC (gint16, cubic, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gint16, full, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gint16, linear, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gint16, cubic, 1, neon);
MAKE_RESAMPLE_FUNC (gint32, full, 1, neon);
MAKE_RESAMPLE_FUNC (gint32, linear, 1, neon);
MAKE_RESAMPLE_FUNC (gint32, cubic, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gint32, full, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gint32, linear, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gint32, cubic, 1, neon);
MAKE_RESAMPLE_FUNC (gfloat, full, 1, neon);
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, neon);
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gfloat, full, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gfloat, linear, 1, neon);
MAKE_RESAMPLE_FUNC_STATIC (gfloat, cubic, 1, neon);
static void
audio_resampler_check_neon (const gchar *option)
......
/* GStreamer
* Copyright (C) <2015> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef __GST_AUDIO_RESAMPLER_PRIVATE_H__
#define __GST_AUDIO_RESAMPLER_PRIVATE_H__
#include "audio-resampler.h"
/* Contains a collection of all things found in other resamplers:
* speex (filter construction, optimizations), ffmpeg (fixed phase filter, blackman filter),
* SRC (linear interpolation, fixed precomputed tables),...
*
* Supports:
* - S16, S32, F32 and F64 formats
* - nearest, linear and cubic interpolation
* - sinc based interpolation with kaiser or blackman-nutall windows
* - fully configurable kaiser parameters
* - dynamic linear or cubic interpolation of filter table, this can
* use less memory but more CPU
* - full filter table, generated from optionally linear or cubic
* interpolation of filter table
* - fixed filter table size with nearest neighbour phase, optionally
* using a precomputed tables
* - dynamic samplerate changes
* - x86 and neon optimizations
*/
typedef void (*ConvertTapsFunc) (gdouble * tmp_taps, gpointer taps,
gdouble weight, gint n_taps);
typedef void (*InterpolateFunc) (gpointer o, const gpointer a, gint len,
const gpointer icoeff, gint astride);
typedef void (*ResampleFunc) (GstAudioResampler * resampler, gpointer in[],
gsize in_len, gpointer out[], gsize out_len, gsize * consumed);
typedef void (*DeinterleaveFunc) (GstAudioResampler * resampler,
gpointer * sbuf, gpointer in[], gsize in_frames);
struct _GstAudioResampler
{
GstAudioResamplerMethod method;
GstAudioResamplerFlags flags;
GstAudioFormat format;
GstStructure *options;
gint format_index;
gint channels;
gint in_rate;
gint out_rate;
gint bps;
gint ostride;
GstAudioResamplerFilterMode filter_mode;
guint filter_threshold;
GstAudioResamplerFilterInterpolation filter_interpolation;
gdouble cutoff;
gdouble kaiser_beta;
/* for cubic */
gdouble b, c;
/* temp taps */
gpointer tmp_taps;
/* oversampled main filter table */
gint oversample;
gint n_taps;
gpointer taps;
gpointer taps_mem;
gsize taps_stride;
gint n_phases;
gint alloc_taps;
gint alloc_phases;
/* cached taps */
gpointer *cached_phases;
gpointer cached_taps;
gpointer cached_taps_mem;
gsize cached_taps_stride;
ConvertTapsFunc convert_taps;
InterpolateFunc interpolate;
DeinterleaveFunc deinterleave;
ResampleFunc resample;
gint blocks;
gint inc;
gint samp_inc;
gint samp_frac;
gint samp_index;
gint samp_phase;
gint skip;
gpointer samples;
gsize samples_len;
gsize samples_avail;
gpointer *sbuf;
};
#endif /* __GST_AUDIO_RESAMPLER_PRIVATE_H__ */
/* GStreamer
* Copyright (C) <2016> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "audio-resampler-x86-sse.h"
#if defined (HAVE_XMMINTRIN_H) && defined(__SSE__)
#include <xmmintrin.h>
static inline void
inner_product_gfloat_full_1_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff, gint bstride)
{
gint i = 0;
__m128 sum = _mm_setzero_ps ();
for (; i < len; i += 8) {
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 0),
_mm_load_ps (b + i + 0)));
sum =
_mm_add_ps (sum, _mm_mul_ps (_mm_loadu_ps (a + i + 4),
_mm_load_ps (b + i + 4)));
}
sum = _mm_add_ps (sum, _mm_movehl_ps (sum, sum));
sum = _mm_add_ss (sum, _mm_shuffle_ps (sum, sum, 0x55));
_mm_store_ss (o, sum);
}
static inline void
inner_product_gfloat_linear_1_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff, gint bstride)
{
gint i = 0;
__m128 sum[2], t;
const gfloat *c[2] = { (gfloat *) ((gint8 *) b + 0 * bstride),
(gfloat *) ((gint8 *) b + 1 * bstride)
};
sum[0] = sum[1] = _mm_setzero_ps ();
for (; i < len; i += 8) {
t = _mm_loadu_ps (a + i + 0);
sum[0] = _mm_add_ps (sum[0], _mm_mul_ps (t, _mm_load_ps (c[0] + i + 0)));
sum[1] = _mm_add_ps (sum[1], _mm_mul_ps (t, _mm_load_ps (c[1] + i + 0)));
t = _mm_loadu_ps (a + i + 4);
sum[0] = _mm_add_ps (sum[0], _mm_mul_ps (t, _mm_load_ps (c[0] + i + 4)));
sum[1] = _mm_add_ps (sum[1], _mm_mul_ps (t, _mm_load_ps (c[1] + i + 4)));
}
sum[0] = _mm_mul_ps (_mm_sub_ps (sum[0], sum[1]), _mm_load1_ps (icoeff));
sum[0] = _mm_add_ps (sum[0], sum[1]);
sum[0] = _mm_add_ps (sum[0], _mm_movehl_ps (sum[0], sum[0]));
sum[0] = _mm_add_ss (sum[0], _mm_shuffle_ps (sum[0], sum[0], 0x55));
_mm_store_ss (o, sum[0]);
}
static inline void
inner_product_gfloat_cubic_1_sse (gfloat * o, const gfloat * a,
const gfloat * b, gint len, const gfloat * icoeff, gint bstride)
{
gint i = 0;
__m128 sum[4];
__m128 t, f = _mm_loadu_ps (icoeff);
const gfloat *c[4] = { (gfloat *) ((gint8 *) b + 0 * bstride),
(gfloat *) ((gint8 *) b + 1 * bstride),
(gfloat *) ((gint8 *) b + 2 * bstride),
(gfloat *) ((gint8 *) b + 3 * bstride)
};
sum[0] = sum[1] = sum[2] = sum[3] = _mm_setzero_ps ();
for (; i < len; i += 4) {
t = _mm_loadu_ps (a + i);
sum[0] = _mm_add_ps (sum[0], _mm_mul_ps (t, _mm_load_ps (c[0] + i)));
sum[1] = _mm_add_ps (sum[1], _mm_mul_ps (t, _mm_load_ps (c[1] + i)));
sum[2] = _mm_add_ps (sum[2], _mm_mul_ps (t, _mm_load_ps (c[2] + i)));
sum[3] = _mm_add_ps (sum[3], _mm_mul_ps (t, _mm_load_ps (c[3] + i)));
}
sum[0] = _mm_mul_ps (sum[0], _mm_shuffle_ps (f, f, 0x00));
sum[1] = _mm_mul_ps (sum[1], _mm_shuffle_ps (f, f, 0x55));
sum[2] = _mm_mul_ps (sum[2], _mm_shuffle_ps (f, f, 0xaa));
sum[3] = _mm_mul_ps (sum[3], _mm_shuffle_ps (f, f, 0xff));
sum[0] = _mm_add_ps (sum[0], sum[1]);
sum[2] = _mm_add_ps (sum[2], sum[3]);
sum[0] = _mm_add_ps (sum[0], sum[2]);
sum[0] = _mm_add_ps (sum[0], _mm_movehl_ps (sum[0], sum[0]));
sum[0] = _mm_add_ss (sum[0], _mm_shuffle_ps (sum[0], sum[0], 0x55));
_mm_store_ss (o, sum[0]);
}
MAKE_RESAMPLE_FUNC (gfloat, full, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, linear, 1, sse);
MAKE_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
void
interpolate_gfloat_linear_sse (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride)
{
gint i;
gfloat *o = op, *a = ap, *ic = icp;
__m128 f[2], t1, t2;
const gfloat *c[2] = { (gfloat *) ((gint8 *) a + 0 * astride),
(gfloat *) ((gint8 *) a + 1 * astride)
};
f[0] = _mm_load1_ps (ic + 0);
f[1] = _mm_load1_ps (ic + 1);
for (i = 0; i < len; i += 8) {
t1 = _mm_mul_ps (_mm_load_ps (c[0] + i + 0), f[0]);
t2 = _mm_mul_ps (_mm_load_ps (c[1] + i + 0), f[1]);
_mm_store_ps (o + i + 0, _mm_add_ps (t1, t2));
t1 = _mm_mul_ps (_mm_load_ps (c[0] + i + 4), f[0]);
t2 = _mm_mul_ps (_mm_load_ps (c[1] + i + 4), f[1]);
_mm_store_ps (o + i + 4, _mm_add_ps (t1, t2));
}
}
void
interpolate_gfloat_cubic_sse (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride)
{
gint i;
gfloat *o = op, *a = ap, *ic = icp;
__m128 f[4], t[4];
const gfloat *c[4] = { (gfloat *) ((gint8 *) a + 0 * astride),
(gfloat *) ((gint8 *) a + 1 * astride),
(gfloat *) ((gint8 *) a + 2 * astride),
(gfloat *) ((gint8 *) a + 3 * astride)
};
f[0] = _mm_load1_ps (ic + 0);
f[1] = _mm_load1_ps (ic + 1);
f[2] = _mm_load1_ps (ic + 2);
f[3] = _mm_load1_ps (ic + 3);
for (i = 0; i < len; i += 4) {
t[0] = _mm_mul_ps (_mm_load_ps (c[0] + i + 0), f[0]);
t[1] = _mm_mul_ps (_mm_load_ps (c[1] + i + 0), f[1]);
t[2] = _mm_mul_ps (_mm_load_ps (c[2] + i + 0), f[2]);
t[3] = _mm_mul_ps (_mm_load_ps (c[3] + i + 0), f[3]);
t[0] = _mm_add_ps (t[0], t[1]);
t[2] = _mm_add_ps (t[2], t[3]);
_mm_store_ps (o + i + 0, _mm_add_ps (t[0], t[2]));
}
}
#endif
/* GStreamer
* Copyright (C) <2016> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef AUDIO_RESAMPLER_X86_SSE_H
#define AUDIO_RESAMPLER_X86_SSE_H
#include "audio-resampler-macros.h"
DECL_RESAMPLE_FUNC (gfloat, full, 1, sse);
DECL_RESAMPLE_FUNC (gfloat, linear, 1, sse);
DECL_RESAMPLE_FUNC (gfloat, cubic, 1, sse);
void interpolate_gfloat_linear_sse (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride);
void interpolate_gfloat_cubic_sse (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride);
#endif /* AUDIO_RESAMPLER_X86_SSE_H */
This diff is collapsed.
/* GStreamer
* Copyright (C) <2016> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifndef AUDIO_RESAMPLER_X86_SSE2_H
#define AUDIO_RESAMPLER_X86_SSE2_H
#include "audio-resampler-macros.h"
DECL_RESAMPLE_FUNC (gint16, full, 1, sse2);
DECL_RESAMPLE_FUNC (gint16, linear, 1, sse2);
DECL_RESAMPLE_FUNC (gint16, cubic, 1, sse2);
DECL_RESAMPLE_FUNC (gdouble, full, 1, sse2);
DECL_RESAMPLE_FUNC (gdouble, linear, 1, sse2);
DECL_RESAMPLE_FUNC (gdouble, cubic, 1, sse2);
void
interpolate_gint16_linear_sse2 (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride);
void
interpolate_gint16_cubic_sse2 (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride);
void
interpolate_gdouble_linear_sse2 (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride);
void
interpolate_gdouble_cubic_sse2 (gpointer op, const gpointer ap,
gint len, const gpointer icp, gint astride);
#endif /* AUDIO_RESAMPLER_X86_SSE2_H */
/* GStreamer
* Copyright (C) <2016> Wim Taymans <wim.taymans@gmail.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "audio-resampler-x86-sse41.h"
#if 0
#define __SSE4_1__
#pragma GCC target("sse4.1")
#endif
#if defined (HAVE_SMMINTRIN_H) && defined (HAVE_EMMINTRIN_H) && defined(__SSE4_1__)
#include <emmintrin.h>
#include <smmintrin.h>
static inline void
inner_product_gint32_full_1_sse41 (gint32 * o, const gint32 * a,
const gint32 * b, gint len, const gint32 * icoeff, gint bstride)
{
gint i = 0;
__m128i sum, ta, tb;
gint64 res;
sum = _mm_setzero_si128 ();
for (; i < len; i += 8) {
ta = _mm_loadu_si128 ((__m128i *) (a + i));
tb = _mm_load_si128 ((__m128i *) (b + i));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
ta = _mm_loadu_si128 ((__m128i *) (a + i + 4));
tb = _mm_load_si128 ((__m128i *) (b + i + 4));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum =
_mm_add_epi64 (sum, _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
}
sum = _mm_add_epi64 (sum, _mm_unpackhi_epi64 (sum, sum));
res = _mm_cvtsi128_si64 (sum);
res = (res + (1 << (PRECISION_S32 - 1))) >> PRECISION_S32;
*o = CLAMP (res, -(1L << 31), (1L << 31) - 1);
}
static inline void
inner_product_gint32_linear_1_sse41 (gint32 * o, const gint32 * a,
const gint32 * b, gint len, const gint32 * icoeff, gint bstride)
{
gint i = 0;
gint64 res;
__m128i sum[2], ta, tb;
__m128i f = _mm_loadu_si128 ((__m128i *) icoeff);
const gint32 *c[2] = { (gint32 *) ((gint8 *) b + 0 * bstride),
(gint32 *) ((gint8 *) b + 1 * bstride)
};
sum[0] = sum[1] = _mm_setzero_si128 ();
for (; i < len; i += 4) {
ta = _mm_loadu_si128 ((__m128i *) (a + i));
tb = _mm_load_si128 ((__m128i *) (c[0] + i));
sum[0] = _mm_add_epi64 (sum[0], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum[0] = _mm_add_epi64 (sum[0], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
tb = _mm_load_si128 ((__m128i *) (c[1] + i));
sum[1] = _mm_add_epi64 (sum[1], _mm_mul_epi32 (_mm_unpacklo_epi32 (ta, ta),
_mm_unpacklo_epi32 (tb, tb)));
sum[1] = _mm_add_epi64 (sum[1], _mm_mul_epi32 (_mm_unpackhi_epi32 (ta, ta),
_mm_unpackhi_epi32 (tb, tb)));
}
sum[0] = _mm_srli_epi64 (sum[0], PRECISION_S32);
sum[1] = _mm_srli_epi64 (sum[1], PRECISION_S32);