文章目录
- 前言
- 一、交叉编译
- 1.Pulse Audio webrtc-audio-processing
- 2.交叉编译
- 二、基于alsa进行实时3A处理
- 1.demo源码
- 2.注意项
- 3.效果展示
- 总结
前言
由于工作需要,硬件3A中的AEC效果实在太差,后面使用SpeexDSP的软3A,效果依旧不是很好,猜测是内部时延估计和时延补偿做的并不是很好,于是采用了webrtc的3A算法,这里记录一下3A移植过程。
|版本声明:山河君,未经博主允许,禁止转载
一、交叉编译
1.Pulse Audio webrtc-audio-processing
在linux下,webrtc 3A是比较好移植的,原因是Pulse Audio是支持webrtc 3A插件的,也就是说,不需要我们自己翻墙下载配置webrtc的环境以及编译链路,Pulse Audio已经帮我们做好了这一步,剩下的就是交叉编译的工作。
对应的gitlab地址:Pulse Audio webrtc-audio-processing
2.交叉编译
先选择好版本,截至到当前博客时间,最新版本是1.3.1,网上之前也有介绍的,基本都是0.3版本的
1.0版本和1.0之前的版本最大的区别就是编译器的不同,之前是通过脚本配置,1.0后都是使用meson 进行配置,所以需要下载meson 以及ninja(用于编译webrtc)
meson和代码下载之后,就需要配置交叉编译链路,meson对应交叉编译器环境需要我们自己写meson配置文件,在源文件目录下,打开cross_file.txt,内容如下:
[binaries]
c = '/usr/bin/aarch64-linux-gnu-gcc'
cpp = '/usr/bin/aarch64-linux-gnu-g++'
ar = '/usr/bin/aarch64-linux-gnu-ar'
strip = '/usr/bin/aarch64-linux-gnu-strip'
pkgconfig = '/usr/bin/aarch64-linux-gnu-pkg-config'
[host_machine]
system = 'linux'
cpu_family = 'aarch64'
cpu = 'armv8-a'
endian = 'little'
[paths]
prefix = '/home/aaron/workplace/webrtc-audio-processing/build'
在源文件目录下创建编译缓存目录以及安装目录
meson . build -Dprefix=$PWD/install --cross-file=cross_file.txt
ninja -C build
ninja -C build install
最后在install目录下可以看到编译好的文件
二、基于alsa进行实时3A处理
1.demo源码
编译好的完整demo下载:demo下载,如果没有积分的话就自己编译,这里只是少了demo的脚本
#include <stdio.h>
#include <stdlib.h>
#include <alsa/asoundlib.h>
#include <pthread.h>
#include <errno.h>
#include <mutex>
#include "modules/audio_processing/include/audio_processing.h"
constexpr int sample_rate_hz = 16000; // 假设采样率为16kHz
constexpr size_t num_channels = 1; // 假设单声道
constexpr size_t frame_size = sample_rate_hz / 100; // 10ms 帧大小
#define CHANNELS 2
#define SAMPLE_RATE 16000
#define PERIOD_SIZE 160
#define BUFFER_SIZE (PERIOD_SIZE * 2)
bool gRun = true;
typedef struct
{
snd_pcm_t *playback_handle;
FILE *pcm_file;
int8_t m_pPlayoutBuffer[PERIOD_SIZE * 2 * 2];
int m_nPlayoutBufferSizeIn20MS = PERIOD_SIZE * 2 * 2;
int m_nPlayoutFramesLeft = 0;
snd_pcm_t *capture_handle;
FILE *pcm_out_file;
int8_t m_pRecordBuffer[PERIOD_SIZE * 2 * 2];
int m_nRecordBufferSizeIn20MS = PERIOD_SIZE * 2 * 2;
int m_nRecordingFramesLeft = PERIOD_SIZE;
FILE *pcm_3a_file;
FILE *pcm_ref_file;
int8_t m_pRefBuffer[PERIOD_SIZE * 2];
int8_t m_p3ABuffer[PERIOD_SIZE * 2]; // in byte
int8_t m_pNearBuffer[PERIOD_SIZE * 2]; // in byte
rtc::scoped_refptr<webrtc::AudioProcessing> apm;
} audio_data_t;
void monoTurnStereo(const int16_t *pSrc, int16_t *pDts, size_t size)
{
for (int j = 0; j < size; j++)
{
pDts[2 * j] = pSrc[j];
pDts[2 * j + 1] = pSrc[j];
}
}
void stereoTurnMono(const unsigned char *pSrc, unsigned char *pDts, size_t size)
{
int nLeftCount = 0;
for (size_t i = 0; i < size; i += 4)
{
pDts[nLeftCount] = pSrc[i];
pDts[nLeftCount + 1] = pSrc[i + 1];
nLeftCount += 2;
}
}
int32_t errorRecovery(int32_t nRet, snd_pcm_t *pDeviceHandle)
{
int st = snd_pcm_state(pDeviceHandle);
printf("Trying to recover from %s error: %s nRet:(%d) (state:%d)\n",
((snd_pcm_stream(pDeviceHandle) ==
SND_PCM_STREAM_CAPTURE)
? "capture"
: "playout"),
snd_strerror(nRet),
nRet,
st);
int res = snd_pcm_recover(pDeviceHandle, nRet, 1);
if (0 == res)
{
printf("Recovery - snd_pcm_recover OK\n");
if ((nRet == -EPIPE || nRet == -ESTRPIPE) &&
snd_pcm_stream(pDeviceHandle) == SND_PCM_STREAM_CAPTURE)
{
// For capture streams we also have to repeat the explicit start()
// to get data flowing again.
int nRet = snd_pcm_start(pDeviceHandle);
if (nRet != 0)
{
printf("Recovery - snd_pcm_start error: %d\n", nRet);
return -1;
}
}
if ((nRet == -EPIPE || nRet == -ESTRPIPE) &&
snd_pcm_stream(pDeviceHandle) == SND_PCM_STREAM_PLAYBACK)
{
// For capture streams we also have to repeat the explicit start() to get
// data flowing again.
snd_pcm_state_t state = snd_pcm_state(pDeviceHandle);
if (state != SND_PCM_STATE_PREPARED)
{
snd_pcm_prepare(pDeviceHandle);
}
int nRet = snd_pcm_start(pDeviceHandle);
if (nRet != 0)
{
printf("Recovery - snd_pcm_start error: %s\n", snd_strerror(nRet));
return -1;
}
}
return -EPIPE == nRet ? 1 : 0;
}
else
{
printf("Unrecoverable alsa stream error: %d\n", res);
}
return res;
}
void *playback_thread(void *arg)
{
printf("playback_thread\n");
audio_data_t *data = (audio_data_t *)arg;
const int policy = SCHED_FIFO;
const int min_prio = sched_get_priority_min(policy);
const int max_prio = sched_get_priority_max(policy);
sched_param param;
const int top_prio = max_prio - 1;
const int low_prio = min_prio + 1;
param.sched_priority = top_prio;
pthread_setschedparam(pthread_self(), policy, ¶m);
while (gRun)
{
int nRet;
snd_pcm_sframes_t sndFrames;
snd_pcm_sframes_t sndAvailFrames;
sndAvailFrames = snd_pcm_avail_update(data->playback_handle);
if (sndAvailFrames < 0)
{
printf("playout snd_pcm_avail_update error: %s\n", snd_strerror(sndAvailFrames));
errorRecovery(sndAvailFrames, data->playback_handle);
continue;
}
else if (sndAvailFrames == 0)
{
nRet = snd_pcm_wait(data->playback_handle, 2);
// if (nRet == 0)
// printf("playout snd_pcm_wait timeout\n");
continue;
}
if (data->m_nPlayoutFramesLeft <= 0)
{
size_t frames = fread(data->m_pRefBuffer, 2, PERIOD_SIZE, data->pcm_file);
if (frames == 0 || frames != PERIOD_SIZE)
{ // 文件播放完毕,重新开始
fseek(data->pcm_file, 0, SEEK_SET);
continue;
}
monoTurnStereo((int16_t *)data->m_pRefBuffer, (int16_t *)data->m_pPlayoutBuffer, PERIOD_SIZE);
data->m_nPlayoutFramesLeft = frames;
}
if ((uint32_t)(sndAvailFrames) > data->m_nPlayoutFramesLeft)
{
sndAvailFrames = (uint32_t)data->m_nPlayoutFramesLeft;
}
int size = snd_pcm_frames_to_bytes(data->playback_handle, data->m_nPlayoutFramesLeft);
sndFrames = snd_pcm_writei(data->playback_handle, &data->m_pPlayoutBuffer[data->m_nPlayoutBufferSizeIn20MS - size], sndAvailFrames);
if (sndFrames < 0)
{
printf("playout snd_pcm_writei error: %s\n", snd_strerror(sndFrames));
data->m_nPlayoutFramesLeft = 0;
errorRecovery(sndFrames, data->playback_handle);
continue;
}
else
{
fwrite(&data->m_pRefBuffer[PERIOD_SIZE * 2 - data->m_nPlayoutFramesLeft * 2], 1, sndFrames * 2, data->pcm_ref_file);
data->m_nPlayoutFramesLeft -= sndFrames;
}
}
return NULL;
}
void *capture_thread(void *arg)
{
printf("capture_thread\n");
audio_data_t *data = (audio_data_t *)arg;
const int policy = SCHED_FIFO;
const int min_prio = sched_get_priority_min(policy);
const int max_prio = sched_get_priority_max(policy);
sched_param param;
const int top_prio = max_prio - 1;
const int low_prio = min_prio + 1;
param.sched_priority = top_prio;
pthread_setschedparam(pthread_self(), policy, ¶m);
while (gRun)
{
int nRet;
snd_pcm_sframes_t sndFrames;
snd_pcm_sframes_t sndAvailFrames;
int8_t buffer[data->m_nRecordBufferSizeIn20MS];
sndAvailFrames = snd_pcm_avail_update(data->capture_handle);
if (sndAvailFrames < 0)
{
printf("capture snd_pcm_avail_update error: %s\n", snd_strerror(sndAvailFrames));
errorRecovery(sndAvailFrames, data->capture_handle);
continue;
}
else if (sndAvailFrames == 0)
{
continue;
}
if ((uint32_t)(sndAvailFrames) > data->m_nRecordingFramesLeft)
sndAvailFrames = data->m_nRecordingFramesLeft;
sndFrames = snd_pcm_readi(data->capture_handle, buffer, sndAvailFrames);
if (sndFrames < 0)
{
printf("capture snd_pcm_readi error: %s\n", snd_strerror(sndFrames));
errorRecovery(sndFrames, data->capture_handle);
continue;
}
else if (sndFrames > 0)
{
int nLeftSize = snd_pcm_frames_to_bytes(data->capture_handle, data->m_nRecordingFramesLeft);
int size = snd_pcm_frames_to_bytes(data->capture_handle, sndFrames);
memcpy(&data->m_pRecordBuffer[data->m_nRecordBufferSizeIn20MS - nLeftSize], buffer, size);
data->m_nRecordingFramesLeft -= sndFrames;
}
if (!data->m_nRecordingFramesLeft)
{
data->m_nRecordingFramesLeft = PERIOD_SIZE;
stereoTurnMono((unsigned char *)data->m_pRecordBuffer, (unsigned char *)data->m_pNearBuffer, PERIOD_SIZE * 2 * 2);
fwrite(data->m_pNearBuffer, 1, PERIOD_SIZE * 2, data->pcm_out_file);
webrtc::StreamConfig stream_config(sample_rate_hz, num_channels);
if (data->apm->ProcessReverseStream((int16_t *)data->m_pRefBuffer, stream_config, stream_config, (int16_t *)data->m_pRefBuffer) != webrtc::AudioProcessing::kNoError)
{
printf("ProcessReverseStream fail\n");
}
if (data->apm->ProcessStream((int16_t *)data->m_pNearBuffer, stream_config, stream_config, (int16_t *)data->m_p3ABuffer) != webrtc::AudioProcessing::kNoError)
{
printf("ProcessStream fail\n");
}
fwrite(data->m_p3ABuffer, 1, PERIOD_SIZE * 2, data->pcm_3a_file);
}
}
return NULL;
}
int setup_pcm_device(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream)
{
snd_pcm_hw_params_t *params;
int err;
if ((err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK)) < 0)
{
printf("无法打开 PCM 设备 %s: %s\n", device, snd_strerror(err));
return err;
}
snd_pcm_hw_params_alloca(¶ms);
snd_pcm_hw_params_any(*handle, params);
snd_pcm_hw_params_set_access(*handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
snd_pcm_hw_params_set_format(*handle, params, SND_PCM_FORMAT_S16_LE);
snd_pcm_hw_params_set_channels(*handle, params, CHANNELS);
snd_pcm_hw_params_set_rate(*handle, params, SAMPLE_RATE, 0);
snd_pcm_hw_params_set_period_size(*handle, params, PERIOD_SIZE, 0);
snd_pcm_hw_params_set_buffer_size(*handle, params, PERIOD_SIZE * 4);
if ((err = snd_pcm_hw_params(*handle, params)) < 0)
{
printf("设置 PCM 参数失败: %s\n", snd_strerror(err));
snd_pcm_close(*handle);
return err;
}
return 0;
}
int main(int argc, char *argv[])
{
audio_data_t data;
if ((data.pcm_file = fopen("./far.pcm", "rb")) == NULL ||
(data.pcm_3a_file = fopen("./3a.pcm", "wb")) == NULL ||
(data.pcm_out_file = fopen("./near.pcm", "wb")) == NULL ||
(data.pcm_ref_file = fopen("./ref.pcm", "wb")) == NULL)
{
printf("fail to open file\n");
return -1;
}
// 3a
data.apm = webrtc::AudioProcessingBuilder().Create();
webrtc::AudioProcessing::Config config;
// 噪声抑制配置
config.noise_suppression.enabled = true;
config.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::Level::kHigh;
// 增益控制配置
config.gain_controller1.enabled = true;
config.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital;
config.gain_controller1.target_level_dbfs = 3; // 目标输出电平
// 回声抑制配置
config.echo_canceller.enabled = true;
config.echo_canceller.mobile_mode = false; // 如果是移动设备可以设置为 true
// 语音活动检测(可选)
config.voice_detection.enabled = true;
// 应用配置
data.apm->ApplyConfig(config);
//
if (setup_pcm_device(&data.playback_handle, "hw:0,0", SND_PCM_STREAM_PLAYBACK) < 0)
{
fclose(data.pcm_file);
return -1;
}
if (setup_pcm_device(&data.capture_handle, "hw:0,0", SND_PCM_STREAM_CAPTURE) < 0)
{
snd_pcm_close(data.playback_handle);
fclose(data.pcm_file);
return -1;
}
pthread_t play_thread, record_thread;
pthread_create(&play_thread, NULL, playback_thread, &data);
int nRet = snd_pcm_prepare(data.playback_handle);
if (nRet < 0)
{
printf("playout snd_pcm_prepare failed (%s)\n", snd_strerror(nRet));
}
pthread_create(&record_thread, NULL, capture_thread, &data);
nRet = snd_pcm_prepare(data.capture_handle);
if (nRet < 0)
{
printf("capture snd_pcm_prepare failed:%s \n", snd_strerror(nRet));
}
nRet = snd_pcm_start(data.capture_handle);
if (nRet < 0)
{
printf("capture snd_pcm_start err:%s\n", snd_strerror(nRet));
nRet = snd_pcm_start(data.capture_handle);
if (nRet < 0)
{
printf("capture snd_pcm_start 2nd try err:%s\n", snd_strerror(nRet));
return false;
}
}
getchar();
gRun = false;
pthread_join(play_thread, NULL);
pthread_join(record_thread, NULL);
snd_pcm_close(data.playback_handle);
snd_pcm_close(data.capture_handle);
fclose(data.pcm_file);
fclose(data.pcm_out_file);
fclose(data.pcm_3a_file);
fclose(data.pcm_ref_file);
printf("end................... \n");
return 0;
}
2.注意项
- webrtc audio processing是基于10ms为一帧进行处理的
- 当前版本中可以设置3A配置等级,具体3A参数调参请参考我另一篇文章音频3A一——webrtc源码3A的启用方法和具体流程
- 对于资源消耗,如果没有对资源特别要求,或者其他特殊情况,尽量不要追求类似于WebRtcAec_Process,WebRtcAgc_Process这种方式单独使用3A的某一个模块,而是通过audio_processing进行处理
- 对于时延,如果有固定时延,应该对于AEC进行设置
3.效果展示
远端参考信号
近端采集信号
回声消除后的信号
总结
webrtc不愧是音视频领域的顶尖,值得我们学习的东西太多了。实际上demo里对于设备的读写,也是从webrtc中摘录出来的。
如果对您有所帮助,请帮忙点个赞吧!