diff --git a/patches/0001-AECM-MIPS-Use-uintptr_t-for-pointer-arithmetic.patch b/patches/0001-AECM-MIPS-Use-uintptr_t-for-pointer-arithmetic.patch new file mode 100644 index 0000000000000000000000000000000000000000..322371960127102aaf064a664a97f1be4913016d --- /dev/null +++ b/patches/0001-AECM-MIPS-Use-uintptr_t-for-pointer-arithmetic.patch @@ -0,0 +1,68 @@ +From 297fd4f2efc53b6d49433eaad91a8e09a0f9cbec Mon Sep 17 00:00:00 2001 +From: Alper Nebi Yasak <alpernebiyasak@gmail.com> +Date: Fri, 25 Oct 2024 00:40:59 +0300 +Subject: [PATCH] AECM: MIPS: Use uintptr_t for pointer arithmetic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Trying to compile the MIPS-specific AECM audio processing file for +mips64el on Debian results in the following errors: + + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc: In function ‘int webrtc::WebRtcAecm_ProcessBlock(AecmCore*, const int16_t*, const int16_t*, const int16_t*, int16_t*)’: + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:955:30: error: cast from ‘int16_t*’ {aka ‘short int*’} to ‘uint32_t’ {aka ‘unsigned int’} loses precision [-fpermissive] + 955 | int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~ + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:955:18: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 955 | int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:956:36: error: cast from ‘int32_t*’ {aka ‘int*’} to ‘uint32_t’ {aka ‘unsigned int’} loses precision [-fpermissive] + 956 | int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~~~~~~~ + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:956:24: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 956 | int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:957:40: error: cast from ‘int32_t*’ {aka ‘int*’} to ‘uint32_t’ {aka ‘unsigned int’} loses precision [-fpermissive] + 957 | ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~ + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:957:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 957 | ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:958:40: error: cast from ‘int32_t*’ {aka ‘int*’} to ‘uint32_t’ {aka ‘unsigned int’} loses precision [-fpermissive] + 958 | ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~ + ../webrtc/modules/audio_processing/aecm/aecm_core_mips.cc:958:23: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] + 958 | ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); + | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Presumably, this file was written for 32-bit MIPS so the author used +uint32_t to do pointer arithmetic over these arrays. Fix the errors by +using uintptr_t to work with pointers. + +Signed-off-by: Alper Nebi Yasak <alpernebiyasak@gmail.com> +--- + webrtc/modules/audio_processing/aecm/aecm_core_mips.cc | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_mips.cc b/webrtc/modules/audio_processing/aecm/aecm_core_mips.cc +index 16b03cf..07c785e 100644 +--- a/webrtc/modules/audio_processing/aecm/aecm_core_mips.cc ++++ b/webrtc/modules/audio_processing/aecm/aecm_core_mips.cc +@@ -952,10 +952,10 @@ int WebRtcAecm_ProcessBlock(AecmCore* aecm, + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + +- int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~31); +- int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~31); +- ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); +- ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); ++ int16_t* fft = (int16_t*)(((uintptr_t)fft_buf + 31) & ~31); ++ int32_t* echoEst32 = (int32_t*)(((uintptr_t)echoEst32_buf + 31) & ~31); ++ ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31); ++ ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; +-- +2.47.1 + diff --git a/patches/0001-Add-support-for-BSD-systems.patch b/patches/0001-Add-support-for-BSD-systems.patch new file mode 100644 index 0000000000000000000000000000000000000000..ac7c29255f3603d009513eaa70f35d8105180f11 --- /dev/null +++ b/patches/0001-Add-support-for-BSD-systems.patch @@ -0,0 +1,110 @@ +From 2a318149f8d5094c82306b8091a7a8b5194bf9c1 Mon Sep 17 00:00:00 2001 +From: Jan Beich <jbeich@FreeBSD.org> +Date: Tue, 7 Jan 2020 18:08:24 +0000 +Subject: [PATCH] Add support for BSD systems + +webrtc/rtc_base/checks.cc:158:28: error: use of undeclared identifier 'LAST_SYSTEM_ERROR' + 158 | file, line, LAST_SYSTEM_ERROR, message); + | ^ +webrtc/rtc_base/checks.cc:220:16: error: use of undeclared identifier 'LAST_SYSTEM_ERROR' + 220 | LAST_SYSTEM_ERROR); + | ^ +In file included from webrtc/rtc_base/platform_thread_types.cc:11: +webrtc/rtc_base/platform_thread_types.h:47:1: error: unknown type name 'PlatformThreadId' + 47 | PlatformThreadId CurrentThreadId(); + | ^ +webrtc/rtc_base/platform_thread_types.h:52:1: error: unknown type name 'PlatformThreadRef' + 52 | PlatformThreadRef CurrentThreadRef(); + | ^ +webrtc/rtc_base/platform_thread_types.h:55:29: error: unknown type name 'PlatformThreadRef' + 55 | bool IsThreadRefEqual(const PlatformThreadRef& a, const PlatformThreadRef& b); + | ^ +webrtc/rtc_base/platform_thread_types.h:55:57: error: unknown type name 'PlatformThreadRef' + 55 | bool IsThreadRefEqual(const PlatformThreadRef& a, const PlatformThreadRef& b); + | ^ +webrtc/rtc_base/platform_thread_types.cc:37:1: error: unknown type name 'PlatformThreadId' + 37 | PlatformThreadId CurrentThreadId() { + | ^ +webrtc/rtc_base/platform_thread_types.cc:58:1: error: unknown type name 'PlatformThreadRef' + 58 | PlatformThreadRef CurrentThreadRef() { + | ^ +webrtc/rtc_base/platform_thread_types.cc:68:29: error: unknown type name 'PlatformThreadRef' + 68 | bool IsThreadRefEqual(const PlatformThreadRef& a, const PlatformThreadRef& b) { + | ^ +webrtc/rtc_base/platform_thread_types.cc:68:57: error: unknown type name 'PlatformThreadRef' + 68 | bool IsThreadRefEqual(const PlatformThreadRef& a, const PlatformThreadRef& b) { + | ^ +In file included from webrtc/rtc_base/event_tracer.cc:30: +In file included from webrtc/api/sequence_checker.h:15: +In file included from webrtc/rtc_base/synchronization/sequence_checker_internal.h:18: +webrtc/rtc_base/synchronization/mutex.h:28:2: error: Unsupported platform. + 28 | #error Unsupported platform. + | ^ +webrtc/rtc_base/synchronization/mutex.h:52:3: error: unknown type name 'MutexImpl' + 52 | MutexImpl impl_; + | ^ +--- + meson.build | 5 +++++ + webrtc/rtc_base/platform_thread_types.cc | 16 ++++++++++++++++ + 2 files changed, 21 insertions(+) + +diff --git a/meson.build b/meson.build +index 8d85d56..05a434a 100644 +--- a/meson.build ++++ b/meson.build +@@ -87,6 +87,11 @@ elif host_system == 'linux' + os_deps += [cc.find_library('rt', required : false)] + os_deps += [dependency('threads')] + have_posix = true ++elif (host_system == 'dragonfly' or host_system == 'freebsd' or ++ host_system == 'netbsd' or host_system == 'openbsd') ++ os_cflags += ['-DWEBRTC_BSD', '-DWEBRTC_THREAD_RR'] ++ os_deps += [dependency('threads')] ++ have_posix = true + elif host_system == 'windows' + platform_cflags += ['-DWEBRTC_WIN', '-D_WIN32'] + # this one is for MinGW to get format specifiers from inttypes.h in C++ +diff --git a/webrtc/rtc_base/platform_thread_types.cc b/webrtc/rtc_base/platform_thread_types.cc +index d64ea68..e98e8ec 100644 +--- a/webrtc/rtc_base/platform_thread_types.cc ++++ b/webrtc/rtc_base/platform_thread_types.cc +@@ -15,6 +15,12 @@ + #include <sys/syscall.h> + #endif + ++#if defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) // WEBRTC_BSD ++#include <pthread_np.h> ++#elif defined(__NetBSD__) // WEBRTC_BSD ++#include <lwp.h> ++#endif ++ + #if defined(WEBRTC_WIN) + #include "rtc_base/arraysize.h" + +@@ -46,6 +52,12 @@ PlatformThreadId CurrentThreadId() { + return zx_thread_self(); + #elif defined(WEBRTC_LINUX) + return syscall(__NR_gettid); ++#elif defined(__DragonFly__) || defined(__FreeBSD__) // WEBRTC_BSD ++ return pthread_getthreadid_np(); ++#elif defined(__NetBSD__) // WEBRTC_BSD ++ return _lwp_self(); ++#elif defined(__OpenBSD__) // WEBRTC_BSD ++ return getthrid(); + #elif defined(__EMSCRIPTEN__) + return static_cast<PlatformThreadId>(pthread_self()); + #else +@@ -116,6 +128,10 @@ void SetCurrentThreadName(const char* name) { + prctl(PR_SET_NAME, reinterpret_cast<unsigned long>(name)); // NOLINT + #elif defined(WEBRTC_MAC) || defined(WEBRTC_IOS) + pthread_setname_np(name); ++#elif defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) // WEBRTC_BSD ++ pthread_set_name_np(pthread_self(), name); ++#elif defined(__NetBSD__) // WEBRTC_BSD ++ pthread_setname_np(pthread_self(), "%s", (void*)name); + #elif defined(WEBRTC_FUCHSIA) + zx_status_t status = zx_object_set_property(zx_thread_self(), ZX_PROP_NAME, + name, strlen(name)); +-- +2.47.1 + diff --git a/patches/0001-Allow-disabling-inline-SSE.patch b/patches/0001-Allow-disabling-inline-SSE.patch new file mode 100644 index 0000000000000000000000000000000000000000..ff209499efc37ecf145c3b58a511074fd929f58e --- /dev/null +++ b/patches/0001-Allow-disabling-inline-SSE.patch @@ -0,0 +1,336 @@ +From fed81a77c9a9bc366556f732324cdc5f9e7b09e9 Mon Sep 17 00:00:00 2001 +From: Arun Raghavan <arun@asymptotic.io> +Date: Thu, 26 Dec 2024 14:24:40 -0500 +Subject: [PATCH] Allow disabling inline SSE + +Should make building on i686 without SSE feasible. + +Fixes: https://gitlab.freedesktop.org/pulseaudio/webrtc-audio-processing/-/issues/5 +--- + meson.build | 14 ++++++++++++-- + meson_options.txt | 5 ++++- + .../audio_processing/aec3/adaptive_fir_filter.cc | 14 ++++++++++---- + .../aec3/adaptive_fir_filter_erl.cc | 6 ++++-- + webrtc/modules/audio_processing/aec3/fft_data.h | 4 +++- + .../audio_processing/aec3/matched_filter.cc | 6 ++++-- + webrtc/modules/audio_processing/aec3/vector_math.h | 8 +++++--- + .../audio_processing/agc2/rnn_vad/vector_math.h | 4 +++- + webrtc/third_party/pffft/meson.build | 2 +- + 9 files changed, 46 insertions(+), 17 deletions(-) + +diff --git a/meson.build b/meson.build +index 811d795..ebf053a 100644 +--- a/meson.build ++++ b/meson.build +@@ -110,6 +110,7 @@ have_neon = false + have_mips = false + have_mips64 = false + have_x86 = false ++have_inline_sse = false + have_avx2 = false + if host_machine.cpu_family() == 'arm' + if cc.compiles('''#ifndef __ARM_ARCH_ISA_ARM +@@ -140,10 +141,19 @@ if host_machine.cpu_family() == 'mips64' + endif + if ['x86', 'x86_64'].contains(host_machine.cpu_family()) + have_x86 = true +- # This is unconditionally enabled for now, actual usage is determined by +- # runtime CPU detection, so we're just assuming the compiler supports avx2 ++ # AVX2 support is unconditionally available, since all the code (compiled ++ # with -mavx2) is in separate files from runtime detection (which should not ++ # be compiled with SIMD flags for cases where the CPU does not support it). ++ # Unfortunately, a bunch of SSE code is inline with the runtime detection, ++ # and we can't support that on systems that don't support SSE. + have_avx2 = true + arch_cflags += ['-DWEBRTC_ENABLE_AVX2'] ++ if get_option('inline-sse') ++ have_inline_sse = true ++ else ++ have_inline_sse = false ++ arch_cflags += ['-DWAP_DISABLE_INLINE_SSE'] ++ endif + endif + + neon_opt = get_option('neon') +diff --git a/meson_options.txt b/meson_options.txt +index c939fb9..d08f356 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -3,4 +3,7 @@ option('gnustl', type: 'feature', + description: 'Use gnustl for a c++ library implementation (only used on Android)') + option('neon', type: 'combo', + choices: ['no', 'yes', 'auto', 'runtime'], +- description: '') ++ description: 'Enable NEON optimisations') ++option('inline-sse', type: 'boolean', ++ value: true, ++ description: 'Enable inline SSE/SSE2 optimisations (i.e. assume CPU supports SSE/SSE2)') +diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc +index 917aa95..ded0511 100644 +--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc ++++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc +@@ -16,7 +16,7 @@ + #if defined(WEBRTC_HAS_NEON) + #include <arm_neon.h> + #endif +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + #include <emmintrin.h> + #endif + #include <math.h> +@@ -88,7 +88,7 @@ void ComputeFrequencyResponse_Neon( + } + #endif + +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + // Computes and stores the frequency response of the filter. + void ComputeFrequencyResponse_Sse2( + size_t num_partitions, +@@ -212,7 +212,7 @@ void AdaptPartitions_Neon(const RenderBuffer& render_buffer, + } + #endif + +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + // Adapts the filter partitions. (SSE2 variant) + void AdaptPartitions_Sse2(const RenderBuffer& render_buffer, + const FftData& G, +@@ -377,7 +377,7 @@ void ApplyFilter_Neon(const RenderBuffer& render_buffer, + } + #endif + +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + // Produces the filter output (SSE2 variant). + void ApplyFilter_Sse2(const RenderBuffer& render_buffer, + size_t num_partitions, +@@ -557,9 +557,11 @@ void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer, + RTC_DCHECK(S); + switch (optimization_) { + #if defined(WEBRTC_ARCH_X86_FAMILY) ++#if !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: + aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S); + break; ++#endif + case Aec3Optimization::kAvx2: + aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S); + break; +@@ -601,9 +603,11 @@ void AdaptiveFirFilter::ComputeFrequencyResponse( + + switch (optimization_) { + #if defined(WEBRTC_ARCH_X86_FAMILY) ++#if !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: + aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2); + break; ++#endif + case Aec3Optimization::kAvx2: + aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2); + break; +@@ -626,10 +630,12 @@ void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer, + // Adapt the filter. + switch (optimization_) { + #if defined(WEBRTC_ARCH_X86_FAMILY) ++#if !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: + aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_, + &H_); + break; ++#endif + case Aec3Optimization::kAvx2: + aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_, + &H_); +diff --git a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc +index 45b8813..920d51c 100644 +--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc ++++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc +@@ -16,7 +16,7 @@ + #if defined(WEBRTC_HAS_NEON) + #include <arm_neon.h> + #endif +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + #include <emmintrin.h> + #endif + +@@ -54,7 +54,7 @@ void ErlComputer_NEON( + } + #endif + +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + // Computes and stores the echo return loss estimate of the filter, which is the + // sum of the partition frequency responses. + void ErlComputer_SSE2( +@@ -82,9 +82,11 @@ void ComputeErl(const Aec3Optimization& optimization, + // Update the frequency response and echo return loss for the filter. + switch (optimization) { + #if defined(WEBRTC_ARCH_X86_FAMILY) ++#if !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: + aec3::ErlComputer_SSE2(H2, erl); + break; ++#endif + case Aec3Optimization::kAvx2: + aec3::ErlComputer_AVX2(H2, erl); + break; +diff --git a/webrtc/modules/audio_processing/aec3/fft_data.h b/webrtc/modules/audio_processing/aec3/fft_data.h +index 9c25e78..892407d 100644 +--- a/webrtc/modules/audio_processing/aec3/fft_data.h ++++ b/webrtc/modules/audio_processing/aec3/fft_data.h +@@ -14,7 +14,7 @@ + // Defines WEBRTC_ARCH_X86_FAMILY, used below. + #include "rtc_base/system/arch.h" + +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + #include <emmintrin.h> + #endif + #include <algorithm> +@@ -49,6 +49,7 @@ struct FftData { + RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size()); + switch (optimization) { + #if defined(WEBRTC_ARCH_X86_FAMILY) ++#if !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: { + constexpr int kNumFourBinBands = kFftLengthBy2 / 4; + constexpr int kLimit = kNumFourBinBands * 4; +@@ -63,6 +64,7 @@ struct FftData { + power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] + + im[kFftLengthBy2] * im[kFftLengthBy2]; + } break; ++#endif + case Aec3Optimization::kAvx2: + SpectrumAVX2(power_spectrum); + break; +diff --git a/webrtc/modules/audio_processing/aec3/matched_filter.cc b/webrtc/modules/audio_processing/aec3/matched_filter.cc +index 59a3b46..86f365a 100644 +--- a/webrtc/modules/audio_processing/aec3/matched_filter.cc ++++ b/webrtc/modules/audio_processing/aec3/matched_filter.cc +@@ -15,7 +15,7 @@ + #if defined(WEBRTC_HAS_NEON) + #include <arm_neon.h> + #endif +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + #include <emmintrin.h> + #endif + #include <algorithm> +@@ -286,7 +286,7 @@ void MatchedFilterCore_NEON(size_t x_start_index, + + #endif + +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + + void MatchedFilterCore_AccumulatedError_SSE2( + size_t x_start_index, +@@ -695,12 +695,14 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer, + + switch (optimization_) { + #if defined(WEBRTC_ARCH_X86_FAMILY) ++#if !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: + aec3::MatchedFilterCore_SSE2( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, + filters_[n], &filters_updated, &error_sum, compute_pre_echo, + instantaneous_accumulated_error_, scratch_memory_); + break; ++#endif + case Aec3Optimization::kAvx2: + aec3::MatchedFilterCore_AVX2( + x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y, +diff --git a/webrtc/modules/audio_processing/aec3/vector_math.h b/webrtc/modules/audio_processing/aec3/vector_math.h +index e4d1381..1506a44 100644 +--- a/webrtc/modules/audio_processing/aec3/vector_math.h ++++ b/webrtc/modules/audio_processing/aec3/vector_math.h +@@ -17,7 +17,7 @@ + #if defined(WEBRTC_HAS_NEON) + #include <arm_neon.h> + #endif +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + #include <emmintrin.h> + #endif + #include <math.h> +@@ -43,7 +43,7 @@ class VectorMath { + void SqrtAVX2(rtc::ArrayView<float> x); + void Sqrt(rtc::ArrayView<float> x) { + switch (optimization_) { +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; +@@ -123,7 +123,7 @@ class VectorMath { + RTC_DCHECK_EQ(z.size(), x.size()); + RTC_DCHECK_EQ(z.size(), y.size()); + switch (optimization_) { +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; +@@ -174,6 +174,7 @@ class VectorMath { + RTC_DCHECK_EQ(z.size(), x.size()); + switch (optimization_) { + #if defined(WEBRTC_ARCH_X86_FAMILY) ++#if !defined(WAP_DISABLE_INLINE_SSE) + case Aec3Optimization::kSse2: { + const int x_size = static_cast<int>(x.size()); + const int vector_limit = x_size >> 2; +@@ -190,6 +191,7 @@ class VectorMath { + z[j] += x[j]; + } + } break; ++#endif + case Aec3Optimization::kAvx2: + AccumulateAVX2(x, z); + break; +diff --git a/webrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h b/webrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h +index 47f6811..f965086 100644 +--- a/webrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h ++++ b/webrtc/modules/audio_processing/agc2/rnn_vad/vector_math.h +@@ -17,7 +17,7 @@ + #if defined(WEBRTC_HAS_NEON) + #include <arm_neon.h> + #endif +-#if defined(WEBRTC_ARCH_X86_FAMILY) ++#if defined(WEBRTC_ARCH_X86_FAMILY) && !defined(WAP_DISABLE_INLINE_SSE) + #include <emmintrin.h> + #endif + +@@ -47,6 +47,7 @@ class VectorMath { + if (cpu_features_.avx2) { + return DotProductAvx2(x, y); + } else if (cpu_features_.sse2) { ++#if !defined(WAP_DISABLE_INLINE_SSE) + __m128 accumulator = _mm_setzero_ps(); + constexpr int kBlockSizeLog2 = 2; + constexpr int kBlockSize = 1 << kBlockSizeLog2; +@@ -72,6 +73,7 @@ class VectorMath { + dot_product += x[i] * y[i]; + } + return dot_product; ++#endif + } + #elif defined(WEBRTC_HAS_NEON) && defined(WEBRTC_ARCH_ARM64) + if (cpu_features_.neon) { +diff --git a/webrtc/third_party/pffft/meson.build b/webrtc/third_party/pffft/meson.build +index c1eb5c6..cf4c9c7 100644 +--- a/webrtc/third_party/pffft/meson.build ++++ b/webrtc/third_party/pffft/meson.build +@@ -4,7 +4,7 @@ pffft_sources = [ + + pffft_cflags = [ '-D_GNU_SOURCE' ] + +-if (have_arm and not have_neon) or (have_mips and host_machine.endian() == 'little') or have_mips64 ++if not have_inline_sse or (have_arm and not have_neon) or (have_mips and host_machine.endian() == 'little') or have_mips64 + pffft_cflags += [ '-DPFFFT_SIMD_DISABLE' ] + endif + +-- +2.47.1 + diff --git a/patches/0001-Fix-up-XMM-intrinsics-usage-on-MSVC.patch b/patches/0001-Fix-up-XMM-intrinsics-usage-on-MSVC.patch new file mode 100644 index 0000000000000000000000000000000000000000..fb8cce85ee7c887930cb8a1f12032d615d2548ab --- /dev/null +++ b/patches/0001-Fix-up-XMM-intrinsics-usage-on-MSVC.patch @@ -0,0 +1,68 @@ +From ad563b095cea13730ca95e77d50e352ea9e344a9 Mon Sep 17 00:00:00 2001 +From: Arun Raghavan <arun@asymptotic.io> +Date: Fri, 15 Dec 2023 16:06:05 -0500 +Subject: [PATCH] Fix up XMM intrinsics usage on MSVC + +Repplying 0a0050746bc20ef970b9f260d485e4367c7ba854 after M131 bump. +--- + .../aec3/matched_filter_avx2.cc | 30 ++++++++++++------- + 1 file changed, 20 insertions(+), 10 deletions(-) + +diff --git a/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc b/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc +index 8c2ffcb..65a1b76 100644 +--- a/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc ++++ b/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc +@@ -13,6 +13,16 @@ + #include "modules/audio_processing/aec3/matched_filter.h" + #include "rtc_base/checks.h" + ++#ifdef _MSC_VER ++// Visual Studio ++#define LOOKUP_M128(v, i) v.m128_f32[i] ++#define LOOKUP_M256(v, i) v.m256_f32[i] ++#else ++// GCC/Clang ++#define LOOKUP_M128(v, i) v[i] ++#define LOOKUP_M256(v, i) v[i] ++#endif ++ + namespace webrtc { + namespace aec3 { + +@@ -81,14 +91,14 @@ void MatchedFilterCore_AccumulatedError_AVX2( + s_inst_256_8 = _mm256_mul_ps(h_k_8, x_k_8); + s_inst_hadd_256 = _mm256_hadd_ps(s_inst_256, s_inst_256_8); + s_inst_hadd_256 = _mm256_hadd_ps(s_inst_hadd_256, s_inst_hadd_256); +- s_acum += s_inst_hadd_256[0]; +- e_128[0] = s_acum - y[i]; +- s_acum += s_inst_hadd_256[4]; +- e_128[1] = s_acum - y[i]; +- s_acum += s_inst_hadd_256[1]; +- e_128[2] = s_acum - y[i]; +- s_acum += s_inst_hadd_256[5]; +- e_128[3] = s_acum - y[i]; ++ s_acum += LOOKUP_M256(s_inst_hadd_256, 0); ++ LOOKUP_M128(e_128, 0) = s_acum - y[i]; ++ s_acum += LOOKUP_M256(s_inst_hadd_256,4); ++ LOOKUP_M128(e_128, 1) = s_acum - y[i]; ++ s_acum += LOOKUP_M256(s_inst_hadd_256,1); ++ LOOKUP_M128(e_128, 2) = s_acum - y[i]; ++ s_acum += LOOKUP_M256(s_inst_hadd_256,5); ++ LOOKUP_M128(e_128, 3) = s_acum - y[i]; + + __m128 accumulated_error = _mm_load_ps(a_p); + accumulated_error = _mm_fmadd_ps(e_128, e_128, accumulated_error); +@@ -209,8 +219,8 @@ void MatchedFilterCore_AVX2(size_t x_start_index, + x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8); + s_256 = _mm256_add_ps(s_256, s_256_8); + __m128 sum = hsum_ab(x2_sum_256, s_256); +- x2_sum += sum[0]; +- s += sum[1]; ++ x2_sum += LOOKUP_M128(sum, 0); ++ s += LOOKUP_M128(sum, 1); + + // Compute the matched filter error. + float e = y[i] - s; +-- +2.47.1 + diff --git a/patches/0001-common_audio-Add-MIPS_DSP_R1_LE-guard-for-vector-sca.patch b/patches/0001-common_audio-Add-MIPS_DSP_R1_LE-guard-for-vector-sca.patch new file mode 100644 index 0000000000000000000000000000000000000000..e17850b069b18b43c3bab4be8279c14fc57f841d --- /dev/null +++ b/patches/0001-common_audio-Add-MIPS_DSP_R1_LE-guard-for-vector-sca.patch @@ -0,0 +1,45 @@ +From 4a17c682e9a173c27feec9e67fb8c4c36090b1a6 Mon Sep 17 00:00:00 2001 +From: Alper Nebi Yasak <alpernebiyasak@gmail.com> +Date: Fri, 25 Oct 2024 01:53:16 +0300 +Subject: [PATCH] common_audio: Add MIPS_DSP_R1_LE guard for vector scaling ops + +The MIPS-specific source for vector scaling operations fails to build on +Debian's mips64el: + + [97/303] Compiling C object webrtc/common_audio/libcommon_audio.a.p/signal_processing_vector_scaling_operations_mips.c.o + FAILED: webrtc/common_audio/libcommon_audio.a.p/signal_processing_vector_scaling_operations_mips.c.o + cc [...] webrtc/common_audio/libcommon_audio.a.p/signal_processing_vector_scaling_operations_mips.c.o.d -o webrtc/common_audio/libcommon_audio.a.p/signal_processing_vector_scaling_operations_mips.c.o -c ../webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c + /tmp/cc7UGPkY.s: Assembler messages: + /tmp/cc7UGPkY.s:57: Error: opcode not supported on this processor: mips64r2 (mips64r2) `extrv_r.w $3,$ac0,$8' + ninja: build stopped: subcommand failed. + +The EXTRV_R.W instruction it uses is part of DSP extensions for this +architecture. In signal_processing_library.h, this function's prototype +is guarded with #if defined(MIPS_DSP_R1_LE). Guard the implementation +like that as well to fix the error. + +Signed-off-by: Alper Nebi Yasak <alpernebiyasak@gmail.com> +--- + .../signal_processing/vector_scaling_operations_mips.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c b/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c +index ba2d26d..08ca293 100644 +--- a/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c ++++ b/webrtc/common_audio/signal_processing/vector_scaling_operations_mips.c +@@ -16,6 +16,7 @@ + + #include "common_audio/signal_processing/include/signal_processing_library.h" + ++#if defined(MIPS_DSP_R1_LE) + int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, +@@ -55,3 +56,4 @@ int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + } + return 0; + } ++#endif +-- +2.47.1 +