Commit 34efc689 authored by Guillaume Desmottes's avatar Guillaume Desmottes 🐐
Browse files

add webrtc-audio-coding public library

This new lib contains the bare minimum to implement an iSAC encoder and
decoder.

The webrtc files have been copied from the revision as the existing
imported files (c8b569e0a7ad0b369e15f0197b3a558699ec8efa).
parent f13529b5
......@@ -111,3 +111,15 @@ pkgconfig.generate(
] + platform_cflags,
libraries: libwebrtc_audio_processing,
)
pkgconfig.generate(
name: 'webrtc-audio-coding',
description: 'WebRTC Audio Coding library',
version: meson.project_version(),
filebase: 'webrtc-audio-coding',
subdirs: 'webrtc_audio_processing',
extra_cflags: [
'-DWEBRTC_AUDIO_PROCESSING_ONLY_BUILD',
] + platform_cflags,
libraries: libwebrtc_audio_coding,
)
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
#include <assert.h>
#include "webrtc/base/checks.h"
namespace webrtc {
int AudioDecoder::Decode(const uint8_t* encoded, size_t encoded_len,
int sample_rate_hz, size_t max_decoded_bytes,
int16_t* decoded, SpeechType* speech_type) {
int duration = PacketDuration(encoded, encoded_len);
if (duration >= 0 &&
duration * Channels() * sizeof(int16_t) > max_decoded_bytes) {
return -1;
}
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
int AudioDecoder::DecodeRedundant(const uint8_t* encoded, size_t encoded_len,
int sample_rate_hz, size_t max_decoded_bytes,
int16_t* decoded, SpeechType* speech_type) {
int duration = PacketDurationRedundant(encoded, encoded_len);
if (duration >= 0 &&
duration * Channels() * sizeof(int16_t) > max_decoded_bytes) {
return -1;
}
return DecodeRedundantInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
int AudioDecoder::DecodeInternal(const uint8_t* encoded, size_t encoded_len,
int sample_rate_hz, int16_t* decoded,
SpeechType* speech_type) {
return kNotImplemented;
}
int AudioDecoder::DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz, int16_t* decoded,
SpeechType* speech_type) {
return DecodeInternal(encoded, encoded_len, sample_rate_hz, decoded,
speech_type);
}
bool AudioDecoder::HasDecodePlc() const { return false; }
size_t AudioDecoder::DecodePlc(size_t num_frames, int16_t* decoded) {
return 0;
}
int AudioDecoder::IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp) {
return 0;
}
int AudioDecoder::ErrorCode() { return 0; }
int AudioDecoder::PacketDuration(const uint8_t* encoded,
size_t encoded_len) const {
return kNotImplemented;
}
int AudioDecoder::PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const {
return kNotImplemented;
}
bool AudioDecoder::PacketHasFec(const uint8_t* encoded,
size_t encoded_len) const {
return false;
}
CNG_dec_inst* AudioDecoder::CngDecoderInstance() {
FATAL() << "Not a CNG decoder";
return NULL;
}
AudioDecoder::SpeechType AudioDecoder::ConvertSpeechType(int16_t type) {
switch (type) {
case 0: // TODO(hlundin): Both iSAC and Opus return 0 for speech.
case 1:
return kSpeech;
case 2:
return kComfortNoise;
default:
assert(false);
return kSpeech;
}
}
} // namespace webrtc
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
#define WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
#include <stdlib.h> // NULL
#include "webrtc/base/constructormagic.h"
#include "webrtc/modules/audio_coding/codecs/cng/include/webrtc_cng.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This is the interface class for decoders in NetEQ. Each codec type will have
// and implementation of this class.
class AudioDecoder {
public:
enum SpeechType {
kSpeech = 1,
kComfortNoise = 2
};
// Used by PacketDuration below. Save the value -1 for errors.
enum { kNotImplemented = -2 };
AudioDecoder() = default;
virtual ~AudioDecoder() = default;
// Decodes |encode_len| bytes from |encoded| and writes the result in
// |decoded|. The maximum bytes allowed to be written into |decoded| is
// |max_decoded_bytes|. Returns the total number of samples across all
// channels. If the decoder produced comfort noise, |speech_type|
// is set to kComfortNoise, otherwise it is kSpeech. The desired output
// sample rate is provided in |sample_rate_hz|, which must be valid for the
// codec at hand.
virtual int Decode(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type);
// Same as Decode(), but interfaces to the decoders redundant decode function.
// The default implementation simply calls the regular Decode() method.
virtual int DecodeRedundant(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
size_t max_decoded_bytes,
int16_t* decoded,
SpeechType* speech_type);
// Indicates if the decoder implements the DecodePlc method.
virtual bool HasDecodePlc() const;
// Calls the packet-loss concealment of the decoder to update the state after
// one or several lost packets. The caller has to make sure that the
// memory allocated in |decoded| should accommodate |num_frames| frames.
virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
// Resets the decoder state (empty buffers etc.).
virtual void Reset() = 0;
// Notifies the decoder of an incoming packet to NetEQ.
virtual int IncomingPacket(const uint8_t* payload,
size_t payload_len,
uint16_t rtp_sequence_number,
uint32_t rtp_timestamp,
uint32_t arrival_timestamp);
// Returns the last error code from the decoder.
virtual int ErrorCode();
// Returns the duration in samples-per-channel of the payload in |encoded|
// which is |encoded_len| bytes long. Returns kNotImplemented if no duration
// estimate is available, or -1 in case of an error.
virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
// Returns the duration in samples-per-channel of the redandant payload in
// |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no
// duration estimate is available, or -1 in case of an error.
virtual int PacketDurationRedundant(const uint8_t* encoded,
size_t encoded_len) const;
// Detects whether a packet has forward error correction. The packet is
// comprised of the samples in |encoded| which is |encoded_len| bytes long.
// Returns true if the packet has FEC and false otherwise.
virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
// If this is a CNG decoder, return the underlying CNG_dec_inst*. If this
// isn't a CNG decoder, don't call this method.
virtual CNG_dec_inst* CngDecoderInstance();
virtual size_t Channels() const = 0;
protected:
static SpeechType ConvertSpeechType(int16_t type);
virtual int DecodeInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
virtual int DecodeRedundantInternal(const uint8_t* encoded,
size_t encoded_len,
int sample_rate_hz,
int16_t* decoded,
SpeechType* speech_type);
private:
RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_NETEQ_INCLUDE_AUDIO_DECODER_H_
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_coding/codecs/audio_encoder.h"
#include "webrtc/base/checks.h"
namespace webrtc {
AudioEncoder::EncodedInfo::EncodedInfo() = default;
AudioEncoder::EncodedInfo::~EncodedInfo() = default;
int AudioEncoder::RtpTimestampRateHz() const {
return SampleRateHz();
}
AudioEncoder::EncodedInfo AudioEncoder::Encode(uint32_t rtp_timestamp,
const int16_t* audio,
size_t num_samples_per_channel,
size_t max_encoded_bytes,
uint8_t* encoded) {
RTC_CHECK_EQ(num_samples_per_channel,
static_cast<size_t>(SampleRateHz() / 100));
EncodedInfo info =
EncodeInternal(rtp_timestamp, audio, max_encoded_bytes, encoded);
RTC_CHECK_LE(info.encoded_bytes, max_encoded_bytes);
return info;
}
bool AudioEncoder::SetFec(bool enable) {
return !enable;
}
bool AudioEncoder::SetDtx(bool enable) {
return !enable;
}
bool AudioEncoder::SetApplication(Application application) {
return false;
}
void AudioEncoder::SetMaxPlaybackRate(int frequency_hz) {}
void AudioEncoder::SetProjectedPacketLossRate(double fraction) {}
void AudioEncoder::SetTargetBitrate(int target_bps) {}
} // namespace webrtc
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_
#define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_
#include <algorithm>
#include <vector>
#include "webrtc/typedefs.h"
namespace webrtc {
// This is the interface class for encoders in AudioCoding module. Each codec
// type must have an implementation of this class.
class AudioEncoder {
public:
struct EncodedInfoLeaf {
size_t encoded_bytes = 0;
uint32_t encoded_timestamp = 0;
int payload_type = 0;
bool send_even_if_empty = false;
bool speech = true;
};
// This is the main struct for auxiliary encoding information. Each encoded
// packet should be accompanied by one EncodedInfo struct, containing the
// total number of |encoded_bytes|, the |encoded_timestamp| and the
// |payload_type|. If the packet contains redundant encodings, the |redundant|
// vector will be populated with EncodedInfoLeaf structs. Each struct in the
// vector represents one encoding; the order of structs in the vector is the
// same as the order in which the actual payloads are written to the byte
// stream. When EncoderInfoLeaf structs are present in the vector, the main
// struct's |encoded_bytes| will be the sum of all the |encoded_bytes| in the
// vector.
struct EncodedInfo : public EncodedInfoLeaf {
EncodedInfo();
~EncodedInfo();
std::vector<EncodedInfoLeaf> redundant;
};
virtual ~AudioEncoder() = default;
// Returns the maximum number of bytes that can be produced by the encoder
// at each Encode() call. The caller can use the return value to determine
// the size of the buffer that needs to be allocated. This value is allowed
// to depend on encoder parameters like bitrate, frame size etc., so if
// any of these change, the caller of Encode() is responsible for checking
// that the buffer is large enough by calling MaxEncodedBytes() again.
virtual size_t MaxEncodedBytes() const = 0;
// Returns the input sample rate in Hz and the number of input channels.
// These are constants set at instantiation time.
virtual int SampleRateHz() const = 0;
virtual int NumChannels() const = 0;
// Returns the rate at which the RTP timestamps are updated. The default
// implementation returns SampleRateHz().
virtual int RtpTimestampRateHz() const;
// Returns the number of 10 ms frames the encoder will put in the next
// packet. This value may only change when Encode() outputs a packet; i.e.,
// the encoder may vary the number of 10 ms frames from packet to packet, but
// it must decide the length of the next packet no later than when outputting
// the preceding packet.
virtual size_t Num10MsFramesInNextPacket() const = 0;
// Returns the maximum value that can be returned by
// Num10MsFramesInNextPacket().
virtual size_t Max10MsFramesInAPacket() const = 0;
// Returns the current target bitrate in bits/s. The value -1 means that the
// codec adapts the target automatically, and a current target cannot be
// provided.
virtual int GetTargetBitrate() const = 0;
// Accepts one 10 ms block of input audio (i.e., SampleRateHz() / 100 *
// NumChannels() samples). Multi-channel audio must be sample-interleaved.
// The encoder produces zero or more bytes of output in |encoded| and
// returns additional encoding information.
// The caller is responsible for making sure that |max_encoded_bytes| is
// not smaller than the number of bytes actually produced by the encoder.
// Encode() checks some preconditions, calls EncodeInternal() which does the
// actual work, and then checks some postconditions.
EncodedInfo Encode(uint32_t rtp_timestamp,
const int16_t* audio,
size_t num_samples_per_channel,
size_t max_encoded_bytes,
uint8_t* encoded);
virtual EncodedInfo EncodeInternal(uint32_t rtp_timestamp,
const int16_t* audio,
size_t max_encoded_bytes,
uint8_t* encoded) = 0;
// Resets the encoder to its starting state, discarding any input that has
// been fed to the encoder but not yet emitted in a packet.
virtual void Reset() = 0;
// Enables or disables codec-internal FEC (forward error correction). Returns
// true if the codec was able to comply. The default implementation returns
// true when asked to disable FEC and false when asked to enable it (meaning
// that FEC isn't supported).
virtual bool SetFec(bool enable);
// Enables or disables codec-internal VAD/DTX. Returns true if the codec was
// able to comply. The default implementation returns true when asked to
// disable DTX and false when asked to enable it (meaning that DTX isn't
// supported).
virtual bool SetDtx(bool enable);
// Sets the application mode. Returns true if the codec was able to comply.
// The default implementation just returns false.
enum class Application { kSpeech, kAudio };
virtual bool SetApplication(Application application);
// Tells the encoder about the highest sample rate the decoder is expected to
// use when decoding the bitstream. The encoder would typically use this
// information to adjust the quality of the encoding. The default
// implementation just returns true.
virtual void SetMaxPlaybackRate(int frequency_hz);
// Tells the encoder what the projected packet loss rate is. The rate is in
// the range [0.0, 1.0]. The encoder would typically use this information to
// adjust channel coding efforts, such as FEC. The default implementation
// does nothing.
virtual void SetProjectedPacketLossRate(double fraction);
// Tells the encoder what average bitrate we'd like it to produce. The
// encoder is free to adjust or disregard the given bitrate (the default
// implementation does the latter).
virtual void SetTargetBitrate(int target_bps);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_ENCODER_H_
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_MAIN_INCLUDE_WEBRTC_CNG_H_
#define WEBRTC_MODULES_AUDIO_CODING_CODECS_CNG_MAIN_INCLUDE_WEBRTC_CNG_H_
#include <stddef.h>
#include "webrtc/typedefs.h"
#ifdef __cplusplus
extern "C" {
#endif
#define WEBRTC_CNG_MAX_LPC_ORDER 12
#define WEBRTC_CNG_MAX_OUTSIZE_ORDER 640
/* Define Error codes. */
/* 6100 Encoder */
#define CNG_ENCODER_NOT_INITIATED 6120
#define CNG_DISALLOWED_LPC_ORDER 6130
#define CNG_DISALLOWED_FRAME_SIZE 6140
#define CNG_DISALLOWED_SAMPLING_FREQUENCY 6150
/* 6200 Decoder */
#define CNG_DECODER_NOT_INITIATED 6220
typedef struct WebRtcCngEncInst CNG_enc_inst;
typedef struct WebRtcCngDecInst CNG_dec_inst;
/****************************************************************************
* WebRtcCng_CreateEnc/Dec(...)
*
* These functions create an instance to the specified structure
*
* Input:
* - XXX_inst : Pointer to created instance that should be created
*
* Return value : 0 - Ok
* -1 - Error
*/
int16_t WebRtcCng_CreateEnc(CNG_enc_inst** cng_inst);
int16_t WebRtcCng_CreateDec(CNG_dec_inst** cng_inst);
/****************************************************************************
* WebRtcCng_InitEnc/Dec(...)
*
* This function initializes a instance
*
* Input:
* - cng_inst : Instance that should be initialized
*
* - fs : 8000 for narrowband and 16000 for wideband
* - interval : generate SID data every interval ms
* - quality : Number of refl. coefs, maximum allowed is 12
*
* Output:
* - cng_inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcCng_InitEnc(CNG_enc_inst* cng_inst, int fs, int16_t interval,
int16_t quality);
void WebRtcCng_InitDec(CNG_dec_inst* cng_inst);
/****************************************************************************
* WebRtcCng_FreeEnc/Dec(...)
*
* These functions frees the dynamic memory of a specified instance
*
* Input:
* - cng_inst : Pointer to created instance that should be freed
*
* Return value : 0 - Ok
* -1 - Error
*/
int16_t WebRtcCng_FreeEnc(CNG_enc_inst* cng_inst);
int16_t WebRtcCng_FreeDec(CNG_dec_inst* cng_inst);
/****************************************************************************
* WebRtcCng_Encode(...)
*
* These functions analyzes background noise
*
* Input:
* - cng_inst : Pointer to created instance
* - speech : Signal to be analyzed
* - nrOfSamples : Size of speech vector
* - forceSID : not zero to force SID frame and reset
*
* Output:
* - bytesOut : Nr of bytes to transmit, might be 0
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcCng_Encode(CNG_enc_inst* cng_inst, int16_t* speech,
size_t nrOfSamples, uint8_t* SIDdata,
size_t* bytesOut, int16_t forceSID);
/****************************************************************************
* WebRtcCng_UpdateSid(...)
*
* These functions updates the CN state, when a new SID packet arrives
*
* Input:
* - cng_inst : Pointer to created instance that should be freed
* - SID : SID packet, all headers removed
* - length : Length in bytes of SID packet
*
* Return value : 0 - Ok
* -1 - Error
*/
int16_t WebRtcCng_UpdateSid(CNG_dec_inst* cng_inst, uint8_t* SID,
size_t length);
/****************************************************************************
* WebRtcCng_Generate(...)
*
* These functions generates CN data when needed
*
* Input:
* - cng_inst : Pointer to created instance that should be freed
* - outData : pointer to area to write CN data
* - nrOfSamples : How much data to generate
* - new_period : >0 if a new period of CNG, will reset history
*
* Return value : 0 - Ok
* -1 - Error
*/
int16_t WebRtcCng_Generate(CNG_dec_inst* cng_inst, int16_t* outData,
size_t nrOfSamples, int16_t new_period);
/*****************************************************************************
* WebRtcCng_GetErrorCodeEnc/Dec(...)
*
* This functions can be used to check the error code of a CNG instance. When
* a function returns -1 a error code will be set for that instance. The
* function below extract the code of the last error that occurred in the
* specified instance.
*