webrtc 3A移植以及实时处理

文章目录

前言
一、交叉编译
- 1.Pulse Audio webrtc-audio-processing
- 2.交叉编译
二、基于alsa进行实时3A处理
- 1.demo源码
- 2.注意项
- 3.效果展示
总结

前言

由于工作需要，硬件3A中的AEC效果实在太差，后面使用SpeexDSP的软3A，效果依旧不是很好，猜测是内部时延估计和时延补偿做的并不是很好，于是采用了webrtc的3A算法，这里记录一下3A移植过程。

|版本声明：山河君，未经博主允许，禁止转载

一、交叉编译

1.Pulse Audio webrtc-audio-processing

在linux下，webrtc 3A是比较好移植的，原因是Pulse Audio是支持webrtc 3A插件的，也就是说，不需要我们自己翻墙下载配置webrtc的环境以及编译链路，Pulse Audio已经帮我们做好了这一步，剩下的就是交叉编译的工作。

对应的gitlab地址：Pulse Audio webrtc-audio-processing

2.交叉编译

先选择好版本，截至到当前博客时间，最新版本是1.3.1，网上之前也有介绍的，基本都是0.3版本的
在这里插入图片描述
1.0版本和1.0之前的版本最大的区别就是编译器的不同，之前是通过脚本配置，1.0后都是使用meson 进行配置，所以需要下载meson 以及ninja（用于编译webrtc）

meson和代码下载之后，就需要配置交叉编译链路，meson对应交叉编译器环境需要我们自己写meson配置文件，在源文件目录下，打开cross_file.txt，内容如下：

[binaries]
c = '/usr/bin/aarch64-linux-gnu-gcc'
cpp = '/usr/bin/aarch64-linux-gnu-g++'
ar = '/usr/bin/aarch64-linux-gnu-ar'
strip = '/usr/bin/aarch64-linux-gnu-strip'
pkgconfig = '/usr/bin/aarch64-linux-gnu-pkg-config'

[host_machine]
system = 'linux'
cpu_family = 'aarch64'
cpu = 'armv8-a'
endian = 'little'

[paths]
prefix = '/home/aaron/workplace/webrtc-audio-processing/build'

在源文件目录下创建编译缓存目录以及安装目录

meson . build -Dprefix=$PWD/install --cross-file=cross_file.txt
ninja -C build
ninja -C build install

最后在install目录下可以看到编译好的文件
在这里插入图片描述

二、基于alsa进行实时3A处理

1.demo源码

编译好的完整demo下载：demo下载，如果没有积分的话就自己编译，这里只是少了demo的脚本

#include <stdio.h>
#include <stdlib.h>
#include <alsa/asoundlib.h>
#include <pthread.h>
#include <errno.h>
#include <mutex>
#include "modules/audio_processing/include/audio_processing.h"

constexpr int sample_rate_hz = 16000;               // 假设采样率为16kHz
constexpr size_t num_channels = 1;                  // 假设单声道
constexpr size_t frame_size = sample_rate_hz / 100; // 10ms 帧大小

#define CHANNELS 2
#define SAMPLE_RATE 16000
#define PERIOD_SIZE 160
#define BUFFER_SIZE (PERIOD_SIZE * 2)
bool gRun = true;

typedef struct
{
    snd_pcm_t *playback_handle;
    FILE *pcm_file;
    int8_t m_pPlayoutBuffer[PERIOD_SIZE * 2 * 2];
    int m_nPlayoutBufferSizeIn20MS = PERIOD_SIZE * 2 * 2;
    int m_nPlayoutFramesLeft = 0;

    snd_pcm_t *capture_handle;
    FILE *pcm_out_file;
    int8_t m_pRecordBuffer[PERIOD_SIZE * 2 * 2];
    int m_nRecordBufferSizeIn20MS = PERIOD_SIZE * 2 * 2;
    int m_nRecordingFramesLeft = PERIOD_SIZE;

    FILE *pcm_3a_file;
    FILE *pcm_ref_file;
    int8_t m_pRefBuffer[PERIOD_SIZE * 2];
    int8_t m_p3ABuffer[PERIOD_SIZE * 2];   // in byte
    int8_t m_pNearBuffer[PERIOD_SIZE * 2]; // in byte
    rtc::scoped_refptr<webrtc::AudioProcessing> apm;
} audio_data_t;

void monoTurnStereo(const int16_t *pSrc, int16_t *pDts, size_t size)
{
    for (int j = 0; j < size; j++)
    {
        pDts[2 * j] = pSrc[j];
        pDts[2 * j + 1] = pSrc[j];
    }
}

void stereoTurnMono(const unsigned char *pSrc, unsigned char *pDts, size_t size)
{
    int nLeftCount = 0;
    for (size_t i = 0; i < size; i += 4)
    {
        pDts[nLeftCount] = pSrc[i];
        pDts[nLeftCount + 1] = pSrc[i + 1];
        nLeftCount += 2;
    }
}

int32_t errorRecovery(int32_t nRet, snd_pcm_t *pDeviceHandle)
{
    int st = snd_pcm_state(pDeviceHandle);
    printf("Trying to recover from %s error: %s nRet:(%d) (state:%d)\n",
           ((snd_pcm_stream(pDeviceHandle) ==
             SND_PCM_STREAM_CAPTURE)
                ? "capture"
                : "playout"),
           snd_strerror(nRet),
           nRet,
           st);

    int res = snd_pcm_recover(pDeviceHandle, nRet, 1);
    if (0 == res)
    {
        printf("Recovery - snd_pcm_recover OK\n");

        if ((nRet == -EPIPE || nRet == -ESTRPIPE) &&
            snd_pcm_stream(pDeviceHandle) == SND_PCM_STREAM_CAPTURE)
        {
            // For capture streams we also have to repeat the explicit start()
            // to get data flowing again.
            int nRet = snd_pcm_start(pDeviceHandle);
            if (nRet != 0)
            {
                printf("Recovery - snd_pcm_start error: %d\n", nRet);
                return -1;
            }
        }

        if ((nRet == -EPIPE || nRet == -ESTRPIPE) &&
            snd_pcm_stream(pDeviceHandle) == SND_PCM_STREAM_PLAYBACK)
        {
            // For capture streams we also have to repeat the explicit start() to get
            // data flowing again.

            snd_pcm_state_t state = snd_pcm_state(pDeviceHandle);
            if (state != SND_PCM_STATE_PREPARED)
            {
                snd_pcm_prepare(pDeviceHandle);
            }

            int nRet = snd_pcm_start(pDeviceHandle);
            if (nRet != 0)
            {
                printf("Recovery - snd_pcm_start error: %s\n", snd_strerror(nRet));
                return -1;
            }
        }

        return -EPIPE == nRet ? 1 : 0;
    }
    else
    {
        printf("Unrecoverable alsa stream error: %d\n", res);
    }

    return res;
}

void *playback_thread(void *arg)
{
    printf("playback_thread\n");
    audio_data_t *data = (audio_data_t *)arg;

    const int policy = SCHED_FIFO;
    const int min_prio = sched_get_priority_min(policy);
    const int max_prio = sched_get_priority_max(policy);

    sched_param param;
    const int top_prio = max_prio - 1;
    const int low_prio = min_prio + 1;
    param.sched_priority = top_prio;
    pthread_setschedparam(pthread_self(), policy, &param);

    while (gRun)
    {
        int nRet;
        snd_pcm_sframes_t sndFrames;
        snd_pcm_sframes_t sndAvailFrames;
        sndAvailFrames = snd_pcm_avail_update(data->playback_handle);

        if (sndAvailFrames < 0)
        {
            printf("playout snd_pcm_avail_update error: %s\n", snd_strerror(sndAvailFrames));
            errorRecovery(sndAvailFrames, data->playback_handle);
            continue;
        }
        else if (sndAvailFrames == 0)
        {
            nRet = snd_pcm_wait(data->playback_handle, 2);
            // if (nRet == 0)
            //     printf("playout snd_pcm_wait timeout\n");
            continue;
        }

        if (data->m_nPlayoutFramesLeft <= 0)
        {
            size_t frames = fread(data->m_pRefBuffer, 2, PERIOD_SIZE, data->pcm_file);
            if (frames == 0 || frames != PERIOD_SIZE)
            { // 文件播放完毕，重新开始
                fseek(data->pcm_file, 0, SEEK_SET);
                continue;
            }
            monoTurnStereo((int16_t *)data->m_pRefBuffer, (int16_t *)data->m_pPlayoutBuffer, PERIOD_SIZE);
            data->m_nPlayoutFramesLeft = frames;
        }

        if ((uint32_t)(sndAvailFrames) > data->m_nPlayoutFramesLeft)
        {
            sndAvailFrames = (uint32_t)data->m_nPlayoutFramesLeft;
        }

        int size = snd_pcm_frames_to_bytes(data->playback_handle, data->m_nPlayoutFramesLeft);
        sndFrames = snd_pcm_writei(data->playback_handle, &data->m_pPlayoutBuffer[data->m_nPlayoutBufferSizeIn20MS - size], sndAvailFrames);

        if (sndFrames < 0)
        {
            printf("playout snd_pcm_writei error: %s\n", snd_strerror(sndFrames));
            data->m_nPlayoutFramesLeft = 0;
            errorRecovery(sndFrames, data->playback_handle);
            continue;
        }
        else
        {
            fwrite(&data->m_pRefBuffer[PERIOD_SIZE * 2 - data->m_nPlayoutFramesLeft * 2], 1, sndFrames * 2, data->pcm_ref_file);
            data->m_nPlayoutFramesLeft -= sndFrames;
        }
    }
    return NULL;
}

void *capture_thread(void *arg)
{
    printf("capture_thread\n");
    audio_data_t *data = (audio_data_t *)arg;

    const int policy = SCHED_FIFO;
    const int min_prio = sched_get_priority_min(policy);
    const int max_prio = sched_get_priority_max(policy);

    sched_param param;
    const int top_prio = max_prio - 1;
    const int low_prio = min_prio + 1;
    param.sched_priority = top_prio;
    pthread_setschedparam(pthread_self(), policy, &param);

    while (gRun)
    {
        int nRet;
        snd_pcm_sframes_t sndFrames;
        snd_pcm_sframes_t sndAvailFrames;
        int8_t buffer[data->m_nRecordBufferSizeIn20MS];

        sndAvailFrames = snd_pcm_avail_update(data->capture_handle);
        if (sndAvailFrames < 0)
        {
            printf("capture snd_pcm_avail_update error: %s\n", snd_strerror(sndAvailFrames));
            errorRecovery(sndAvailFrames, data->capture_handle);
            continue;
        }
        else if (sndAvailFrames == 0)
        {
            continue;
        }

        if ((uint32_t)(sndAvailFrames) > data->m_nRecordingFramesLeft)
            sndAvailFrames = data->m_nRecordingFramesLeft;

        sndFrames = snd_pcm_readi(data->capture_handle, buffer, sndAvailFrames);
        if (sndFrames < 0)
        {
            printf("capture snd_pcm_readi error: %s\n", snd_strerror(sndFrames));
            errorRecovery(sndFrames, data->capture_handle);
            continue;
        }
        else if (sndFrames > 0)
        {
            int nLeftSize = snd_pcm_frames_to_bytes(data->capture_handle, data->m_nRecordingFramesLeft);
            int size = snd_pcm_frames_to_bytes(data->capture_handle, sndFrames);
            memcpy(&data->m_pRecordBuffer[data->m_nRecordBufferSizeIn20MS - nLeftSize], buffer, size);
            data->m_nRecordingFramesLeft -= sndFrames;
        }

        if (!data->m_nRecordingFramesLeft)
        {
            data->m_nRecordingFramesLeft = PERIOD_SIZE;

            stereoTurnMono((unsigned char *)data->m_pRecordBuffer, (unsigned char *)data->m_pNearBuffer, PERIOD_SIZE * 2 * 2);

            fwrite(data->m_pNearBuffer, 1, PERIOD_SIZE * 2, data->pcm_out_file);

            webrtc::StreamConfig stream_config(sample_rate_hz, num_channels);
            if (data->apm->ProcessReverseStream((int16_t *)data->m_pRefBuffer, stream_config, stream_config, (int16_t *)data->m_pRefBuffer) != webrtc::AudioProcessing::kNoError)
            {
                printf("ProcessReverseStream fail\n");
            }
            if (data->apm->ProcessStream((int16_t *)data->m_pNearBuffer, stream_config, stream_config, (int16_t *)data->m_p3ABuffer) != webrtc::AudioProcessing::kNoError)
            {
                printf("ProcessStream fail\n");
            }

            fwrite(data->m_p3ABuffer, 1, PERIOD_SIZE * 2, data->pcm_3a_file);
        }
    }
    return NULL;
}

int setup_pcm_device(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream)
{
    snd_pcm_hw_params_t *params;
    int err;

    if ((err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK)) < 0)
    {
        printf("无法打开 PCM 设备 %s: %s\n", device, snd_strerror(err));
        return err;
    }

    snd_pcm_hw_params_alloca(&params);
    snd_pcm_hw_params_any(*handle, params);
    snd_pcm_hw_params_set_access(*handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
    snd_pcm_hw_params_set_format(*handle, params, SND_PCM_FORMAT_S16_LE);
    snd_pcm_hw_params_set_channels(*handle, params, CHANNELS);
    snd_pcm_hw_params_set_rate(*handle, params, SAMPLE_RATE, 0);
    snd_pcm_hw_params_set_period_size(*handle, params, PERIOD_SIZE, 0);
    snd_pcm_hw_params_set_buffer_size(*handle, params, PERIOD_SIZE * 4);
    if ((err = snd_pcm_hw_params(*handle, params)) < 0)
    {
        printf("设置 PCM 参数失败: %s\n", snd_strerror(err));
        snd_pcm_close(*handle);
        return err;
    }
    return 0;
}

int main(int argc, char *argv[])
{

    audio_data_t data;

    if ((data.pcm_file = fopen("./far.pcm", "rb")) == NULL ||
        (data.pcm_3a_file = fopen("./3a.pcm", "wb")) == NULL ||
        (data.pcm_out_file = fopen("./near.pcm", "wb")) == NULL ||
        (data.pcm_ref_file = fopen("./ref.pcm", "wb")) == NULL)
    {
        printf("fail to open file\n");
        return -1;
    }

    
    // 3a
    data.apm = webrtc::AudioProcessingBuilder().Create();
    webrtc::AudioProcessing::Config config;

    // 噪声抑制配置
    config.noise_suppression.enabled = true;
    config.noise_suppression.level = webrtc::AudioProcessing::Config::NoiseSuppression::Level::kHigh;

    // 增益控制配置
    config.gain_controller1.enabled = true;
    config.gain_controller1.mode = webrtc::AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital;
    config.gain_controller1.target_level_dbfs = 3; // 目标输出电平

    // 回声抑制配置
    config.echo_canceller.enabled = true;
    config.echo_canceller.mobile_mode = false; // 如果是移动设备可以设置为 true

    // 语音活动检测（可选）
    config.voice_detection.enabled = true;

    // 应用配置
    data.apm->ApplyConfig(config);
    //

    if (setup_pcm_device(&data.playback_handle, "hw:0,0", SND_PCM_STREAM_PLAYBACK) < 0)
    {
        fclose(data.pcm_file);
        return -1;
    }

    if (setup_pcm_device(&data.capture_handle, "hw:0,0", SND_PCM_STREAM_CAPTURE) < 0)
    {
        snd_pcm_close(data.playback_handle);
        fclose(data.pcm_file);
        return -1;
    }

    pthread_t play_thread, record_thread;
    pthread_create(&play_thread, NULL, playback_thread, &data);
    int nRet = snd_pcm_prepare(data.playback_handle);
    if (nRet < 0)
    {
        printf("playout snd_pcm_prepare failed (%s)\n", snd_strerror(nRet));
    }

    pthread_create(&record_thread, NULL, capture_thread, &data);

    nRet = snd_pcm_prepare(data.capture_handle);
    if (nRet < 0)
    {
        printf("capture snd_pcm_prepare failed:%s \n", snd_strerror(nRet));
    }

    nRet = snd_pcm_start(data.capture_handle);
    if (nRet < 0)
    {
        printf("capture snd_pcm_start err:%s\n", snd_strerror(nRet));
        nRet = snd_pcm_start(data.capture_handle);
        if (nRet < 0)
        {
            printf("capture snd_pcm_start 2nd try err:%s\n", snd_strerror(nRet));
            return false;
        }
    }

    getchar();
    gRun = false;

    pthread_join(play_thread, NULL);
    pthread_join(record_thread, NULL);

    snd_pcm_close(data.playback_handle);
    snd_pcm_close(data.capture_handle);
    fclose(data.pcm_file);
    fclose(data.pcm_out_file);
    fclose(data.pcm_3a_file);
    fclose(data.pcm_ref_file);

    printf("end................... \n");

    return 0;
}

2.注意项

webrtc audio processing是基于10ms为一帧进行处理的
当前版本中可以设置3A配置等级，具体3A参数调参请参考我另一篇文章音频3A一——webrtc源码3A的启用方法和具体流程
对于资源消耗，如果没有对资源特别要求，或者其他特殊情况，尽量不要追求类似于WebRtcAec_Process，WebRtcAgc_Process这种方式单独使用3A的某一个模块，而是通过audio_processing进行处理
对于时延，如果有固定时延，应该对于AEC进行设置