一、概述
本节我们基于webrtc的非线性波束形成进行代码仿真,并对仿真结果进行展示和分析总结。更多资料和代码可以进入https://t.zsxq.com/qgmoN ,同时欢迎大家提出宝贵的建议,以共同探讨学习。
二、仿真代码
2.1 常量参数
% *@author : aflyingwolf
% *@date : 2025.4.15
% *@file : beamform_define_const_param.m
function beamform_param = beamform_define_const_param()
beamform_param.kFftSize = 256;
beamform_param.kNumFreqBins = (beamform_param.kFftSize / 2 + 1);
beamform_param.kMaskTimeSmoothAlpha = 0.2;
beamform_param.kMaskFrequencySmoothAlpha = 0.6;
beamform_param.kCompensationGain = 0.2;
beamform_param.kSpeedOfSoundMeterSeconds = 343.0;
beamform_param.kBalance = 0.90;
beamform_param.kBeamwidthConstant = 0.00002;
beamform_param.kInterfNum = 4;
beamform_param.kMaxDotProduct = 1e-6;
beamform_param.kLowMeanStartHz = 125;
beamform_param.kLowMeanEndHz = 400;
beamform_param.kSampleRate = 16000;
end
2.2 参数初始化
% *@author : aflyingwolf
% *@date : 2025.4.15
% *@file : beamform_reset.m
function beamform_inst = beamform_reset(beamform_inst, target_direction)
beamform_inst.target_angle_radians = target_direction(1);
kHighMeanStartHz = 2000;
kHighMeanEndHz = 3000;
beamform_inst.high_mean_start_bin = floor(kHighMeanStartHz * beamform_inst.param.kFftSize / ...
beamform_inst.sample_rate_hz + 0.5);
beamform_inst.high_mean_end_bin = floor(kHighMeanEndHz * beamform_inst.param.kFftSize / ...
beamform_inst.sample_rate_hz + 0.5);
%InitInterfAngles
target_direction_point = [cos(beamform_inst.target_angle_radians), ...
sin(beamform_inst.target_angle_radians), ...
0];
clockwise_interf_direction_point = [cos(beamform_inst.target_angle_radians - beamform_inst.away_radians), ...
sin(beamform_inst.target_angle_radians - beamform_inst.away_radians), ...
0];
if beamform_inst.array_sign > 0 && dot(beamform_inst.array_normal, target_direction_point) * ...
dot(beamform_inst.array_normal, clockwise_interf_direction_point) >= 0
beamform_inst.interf_angles_radians(1) = beamform_inst.target_angle_radians - beamform_inst.away_radians;
else
beamform_inst.interf_angles_radians(1) = beamform_inst.target_angle_radians - beamform_inst.away_radians + pi;
end
counterclock_interf_direction_point = [cos(beamform_inst.target_angle_radians + beamform_inst.away_radians), ...
sin(beamform_inst.target_angle_radians + beamform_inst.away_radians), ...
0];
if beamform_inst.array_sign > 0 && dot(beamform_inst.array_normal, target_direction_point) * ...
dot(beamform_inst.array_normal, counterclock_interf_direction_point) >= 0
beamform_inst.interf_angles_radians(2) = beamform_inst.target_angle_radians + beamform_inst.away_radians;
else
beamform_inst.interf_angles_radians(2) = beamform_inst.target_angle_radians + beamform_inst.away_radians - pi;
end
%wj add
clockwise_interf_direction_point = [cos(beamform_inst.target_angle_radians - beamform_inst.away_radians2), ...
sin(beamform_inst.target_angle_radians - beamform_inst.away_radians2), ...
0];
if beamform_inst.array_sign > 0 && dot(beamform_inst.array_normal, target_direction_point) * ...
dot(beamform_inst.array_normal, clockwise_interf_direction_point) >= 0
beamform_inst.interf_angles_radians(3) = beamform_inst.target_angle_radians - beamform_inst.away_radians2;
else
beamform_inst.interf_angles_radians(3) = beamform_inst.target_angle_radians - beamform_inst.away_radians2 + pi;
end
counterclock_interf_direction_point = [cos(beamform_inst.target_angle_radians + beamform_inst.away_radians2), ...
sin(beamform_inst.target_angle_radians + beamform_inst.away_radians2), ...
0];
if beamform_inst.array_sign > 0 && dot(beamform_inst.array_normal, target_direction_point) * ...
dot(beamform_inst.array_normal, counterclock_interf_direction_point) >= 0
beamform_inst.interf_angles_radians(4) = beamform_inst.target_angle_radians + beamform_inst.away_radians2;
else
beamform_inst.interf_angles_radians(4) = beamform_inst.target_angle_radians + beamform_inst.away_radians2 - pi;
end
%InitDelaySumMasks
for t = 1:1:beamform_inst.param.kNumFreqBins
freq_in_hertz = (t-1) / beamform_inst.param.kFftSize * beamform_inst.sample_rate_hz;
for k = 1:1:beamform_inst.num_input_channels
distance = cos(beamform_inst.target_angle_radians) * beamform_inst.array_geometry(k,1) + ...
sin(beamform_inst.target_angle_radians) * beamform_inst.array_geometry(k,2);
phase_shift = 2.0 * pi * distance * freq_in_hertz / beamform_inst.param.kSpeedOfSoundMeterSeconds;
beamform_inst.delay_sum_masks(t,k) = complex(cos(phase_shift), sin(phase_shift));
end
end
beamform_inst.delay_sum_masks = beamform_inst.delay_sum_masks/sqrt(beamform_inst.num_input_channels);
%InitTargetCovMats
for t = 1:1:beamform_inst.param.kNumFreqBins
beamform_inst.target_cov_mats(t,:,:) = beamform_inst.delay_sum_masks(t,:).' * conj(beamform_inst.delay_sum_masks(t,:));
xx = trace(squeeze(beamform_inst.target_cov_mats(t,:,:)));
beamform_inst.target_cov_mats(t,:,:) = beamform_inst.target_cov_mats(t,:,:) / xx;
end
%InitInterfCovMats
for t = 1:1:beamform_inst.param.kNumFreqBins
for k = 1:1:beamform_inst.param.kInterfNum
interf_cov_vector = complex(zeros(1, beamform_inst.num_input_channels));
freq_in_hertz = (t-1) / beamform_inst.param.kFftSize * beamform_inst.sample_rate_hz;
for s = 1:1:beamform_inst.num_input_channels
distance = cos(beamform_inst.interf_angles_radians(k)) * beamform_inst.array_geometry(s,1) + ...
sin(beamform_inst.interf_angles_radians(k)) * beamform_inst.array_geometry(s,2);
phase_shift = 2.0 * pi * distance * freq_in_hertz / beamform_inst.param.kSpeedOfSoundMeterSeconds;
interf_cov_vector(s) = complex(cos(phase_shift), sin(phase_shift));
end
interf_cov_vector_transposed = interf_cov_vector.';
beamform_inst.interf_cov_mats(t, k,:,:) = interf_cov_vector_transposed * conj(interf_cov_vector);
xx = trace(squeeze(beamform_inst.interf_cov_mats(t, k,:,:)));
beamform_inst.interf_cov_mats(t, k,:,:) = beamform_inst.interf_cov_mats(t, k,:,:) / xx;
beamform_inst.interf_cov_mats(t, k,:,:) = beamform_inst.interf_cov_mats(t, k,:,:) * beamform_inst.param.kBalance;
beamform_inst.interf_cov_mats(t, k,:,:) = squeeze(beamform_inst.interf_cov_mats(t, k,:,:)) + squeeze(beamform_inst.uniform_cov_mat(t,:,:));
end
end
%NormalizeCovMats
for t = 1:1:beamform_inst.param.kNumFreqBins
xx = conj(beamform_inst.delay_sum_masks(t,:)) * ...
squeeze(beamform_inst.target_cov_mats(t,:,:)) * beamform_inst.delay_sum_masks(t,:).';
xx = real(xx);
xx = max(xx, 0);
beamform_inst.rxiws(t) = xx;
for k = 1:1:beamform_inst.param.kInterfNum
xx = conj(beamform_inst.delay_sum_masks(t,:)) * ...
squeeze(beamform_inst.interf_cov_mats(t,k,:,:)) * beamform_inst.delay_sum_masks(t,:).';
xx = real(xx);
xx = max(xx, 0);
beamform_inst.rpsiws(t,k) = xx;
end
end
end
2.3 初始化句柄
% *@author : aflyingwolf
% *@date : 2025.4.15
% *@file : beamform_instance.m
function beamform_inst = beamform_instance(beamform_param, sample_rate_hz, array_geometry, mic_num, target_direction)
beamform_inst.param = beamform_param;
beamform_inst.sample_rate_hz = sample_rate_hz;
beamform_inst.num_input_channels = mic_num;
beamform_inst.array_geometry = array_geometry - mean(array_geometry, 1);
micArray = mic_array();
[beamform_inst.array_sign, beamform_inst.array_normal] = micArray.direction(beamform_inst.array_geometry);
beamform_inst.target_angle_radians = target_direction(1);
beamform_inst.away_radians = pi/4.0;
beamform_inst.away_radians2 = pi/2.0;
beamform_inst.high_pass_postfilter_mask = 1.0;
beamform_inst.time_smooth_mask = ones(1,beamform_param.kNumFreqBins);
beamform_inst.final_mask = ones(1,beamform_param.kNumFreqBins);
beamform_inst.wave_numbers = ones(1,beamform_param.kNumFreqBins);
beamform_inst.mask_thresholds = ones(1,beamform_param.kNumFreqBins);
beamform_inst.interf_angles_radians = ones(1, beamform_param.kInterfNum);
for t = 1:1:beamform_param.kNumFreqBins
freq_hz = (t-1) / beamform_param.kFftSize * beamform_inst.sample_rate_hz;
beamform_inst.wave_numbers(t) = 2 * pi * freq_hz / beamform_param.kSpeedOfSoundMeterSeconds;
beamform_inst.mask_thresholds(t) = beamform_inst.num_input_channels * beamform_inst.num_input_channels * ...
beamform_param.kBeamwidthConstant * beamform_inst.wave_numbers(t) *...
beamform_inst.wave_numbers(t);
end
beamform_inst.delay_sum_masks = complex(zeros(beamform_param.kNumFreqBins, beamform_inst.num_input_channels));
beamform_inst.target_cov_mats = complex(zeros(beamform_param.kNumFreqBins, ...
beamform_inst.num_input_channels, beamform_inst.num_input_channels));
beamform_inst.uniform_cov_mat = complex(zeros(beamform_param.kNumFreqBins, ...
beamform_inst.num_input_channels, beamform_inst.num_input_channels));
beamform_inst.interf_cov_mats = complex(zeros(beamform_param.kNumFreqBins, ...
beamform_param.kInterfNum, ...
beamform_inst.num_input_channels, beamform_inst.num_input_channels));
beamform_inst.eig_m = complex(zeros(1, beamform_inst.num_input_channels));
beamform_inst.low_mean_start_bin = floor(beamform_param.kLowMeanStartHz * beamform_param.kFftSize / ...
beamform_inst.sample_rate_hz + 0.5);
beamform_inst.low_mean_end_bin = floor(beamform_param.kLowMeanEndHz * beamform_param.kFftSize / ...
beamform_inst.sample_rate_hz + 0.5);
array_distance = micArray.distance(beamform_inst.array_geometry);
for t = 1:1:beamform_param.kNumFreqBins
% diffuse_cov = zeros(beamform_inst.num_input_channels, beamform_inst.num_input_channels);
if beamform_inst.wave_numbers(t) > 0
diffuse_cov = besselj(0, beamform_inst.wave_numbers(t)*array_distance);
else
diffuse_cov = eye(beamform_inst.num_input_channels);
end
diffuse_cov = diffuse_cov/6.0;
beamform_inst.uniform_cov_mat(t,:,:) = complex(diffuse_cov) * (1 - beamform_param.kBalance);
end
target_direction(2) = 0.0;
target_direction(3) = 1.0;
beamform_inst = beamform_reset(beamform_inst, target_direction);
end
2.4 逻辑处理
% *@author : aflyingwolf
% *@date : 2025.4.15
% *@file : beamform_audio_process.m
function output = beamform_audio_process(wav_data, array_geometry, sample_rate_hz, target_direction)
beamform_param = beamform_define_const_param();
%start bf
beamform_inst = beamform_instance(beamform_param, sample_rate_hz, array_geometry, size(array_geometry, 1), target_direction);
%end bf
% %start xh
% beamform_inst = cell(1, 360);
% for k=1:20:360
% target_direction = k * pi / 180;
% kk = beamform_instance(beamform_param, sample_rate_hz, array_geometry, size(array_geometry, 1), target_direction);
% beamform_inst{k} = kk;
% end
% P = zeros(1,length(1:20:360));
%
% %end xh
num_channel = size(wav_data, 2);
num_points = size(wav_data, 1);
num_frames = fix((num_points-beamform_param.kFftSize)/(beamform_param.kFftSize/2)) + 1;
num_points = num_frames * beamform_param.kFftSize/2 + beamform_param.kFftSize/2;
window = reshape(kbdwin(beamform_param.kFftSize, 1.5),1,[]);
spec = zeros(num_frames, beamform_param.kFftSize, num_channel);
output = zeros(1, num_points);
for ch = 1:num_channel
frames = enframe(wav_data(:,ch), window, (beamform_param.kFftSize/2));
frames = fft(frames, beamform_param.kFftSize, 2);
spec(:,:,ch) = frames;
end
% %start doa
% c = 340;
% Nele = size(wav_data, 2);
% omega = zeros(beamform_param.kNumFreqBins,1);
% H = zeros(360,beamform_param.kNumFreqBins,Nele);
% alpha = 0.0;
% r = 0.0463;
%
% theta = 90*pi/180; %固定一个俯仰角
% gamma = [0 300 240 180 120 60]*pi/180;
% step = 5;
%
% starts = 1;
% ends = beamform_param.kFftSize/2+1;
% for f = 1:step:360
% tao = r*sin(theta)*cos((f)/180*pi-gamma)/c; %方位角 0 < angle <360
% for k = starts:1:ends
% omega(k) = 2*pi*(k-1)*sample_rate_hz/beamform_inst.param.kFftSize;
% % steering vector
% H(f,k,:) = exp(-1j*omega(k)*tao);
% end
% end
%
% P = zeros(1,length(1:step:360));
% %end doa
h = waitbar(0,'1','name','Simulation');
for f = 1:1:num_frames
s=sprintf('Simulation in process:%d',ceil(f/num_frames*100));
waitbar(f/num_frames,h,[s '%']);
in_frame_fft = squeeze(spec(f,:,:)).';
in_frame_fft = in_frame_fft.';
in_frame_fft = in_frame_fft(1:beamform_param.kNumFreqBins,:);
% %start doa
% t = 0;
% for f_doa = 1:step:360
% filter = squeeze(H(f_doa,:,:));
% x_fft=bsxfun(@times, in_frame_fft,filter);
% % phase transformed
% x_fft = bsxfun(@rdivide, x_fft,abs(in_frame_fft));
% yf = sum(x_fft,2);
% t = t+1;
% %beamformed output energy
% P(t) = alpha * P(t) + (1-alpha) * yf'*yf;
% end
% index = 0;
% value1 = -1;
% P = real(P);
% for k = 1:step:length(1:step:360)
% if (P(k) - value1) > 1e-6
% value1 = P(k);
% index = k;
% end
% end
% ang = (index)*step;
% ang1 = ang;
%
% target_direction = ang1 * pi / 180;
% beamform_inst = beamform_reset(beamform_inst, target_direction);
%
% %end doa
% %start xh
% t=1;
% index = 0;
% value1 = -1;
% for k=1:20:360
% kk = beamform_inst{k};
% [out_frame_fft2, kk] = beamform_process_block(kk, in_frame_fft);
% beamform_inst{k} = kk;
%
% PP = out_frame_fft2 * out_frame_fft2';
% t = t + 1;
% if (PP - value1) > eps
% value1 = PP;
% out_frame_fft = out_frame_fft2;
% end
% end
%
% %end xh
%start bf
[out_frame_fft, beamform_inst] = beamform_process_block(beamform_inst, in_frame_fft);
%endbf
% out_frame_fft = in_frame_fft(:,3).';
out_frame_fft = [out_frame_fft, conj(fliplr(out_frame_fft(2:(beamform_param.kNumFreqBins-1))))];
out_frame_pcm = ifft(out_frame_fft);
out_frame_pcm = out_frame_pcm .* window;
f_p = (f-1)*(beamform_param.kFftSize/2)+1;
output(f_p:f_p+beamform_param.kFftSize-1) = output(f_p:f_p+beamform_param.kFftSize-1) + out_frame_pcm;
end
output = real(output);
delete(h);
end
2.5 核心代码
% *@author : aflyingwolf
% *@date : 2025.4.15
% *@file : beamform_process_block.m
function [output,beamform_inst] = beamform_process_block(beamform_inst, input)
for k = beamform_inst.low_mean_start_bin+1:1:beamform_inst.high_mean_end_bin
beamform_inst.eig_m = input(k,:);
eig_m_norm_factor = sqrt(beamform_inst.eig_m * beamform_inst.eig_m');
if abs(eig_m_norm_factor) > eps
beamform_inst.eig_m = beamform_inst.eig_m / eig_m_norm_factor;
end
rxim = conj(beamform_inst.eig_m) * squeeze(beamform_inst.target_cov_mats(k,:,:)) * beamform_inst.eig_m.';
rxim = real(rxim);
rxim = max(rxim, 0);
ratio_rxiw_rxim = 0;
if rxim > 0.0
ratio_rxiw_rxim = beamform_inst.rxiws(k) / rxim;
end
rmw = abs(conj(beamform_inst.delay_sum_masks(k,:)) * beamform_inst.eig_m.');
rmw = rmw * rmw;
rmw_r = real(rmw);
beamform_inst.new_mask(k) = 1.0;
for t = 1:1:beamform_inst.param.kInterfNum
kMaskMinimum = 0.01;
rpsim = conj(beamform_inst.eig_m) * squeeze(beamform_inst.interf_cov_mats(k,t,:,:)) * beamform_inst.eig_m.';
rpsim = real(rpsim);
rpsim = max(rpsim, 0);
ratio = 0.0;
if rpsim > 0
ratio = beamform_inst.rpsiws(k,t) / rpsim;
end
numrator = rmw_r - ratio;
denominator = ratio_rxiw_rxim - ratio;
mask = 1.0;
if denominator > beamform_inst.mask_thresholds(k)
lambda = numrator / denominator;
mask = lambda * ratio_rxiw_rxim / rmw_r;
if mask < kMaskMinimum
mask = kMaskMinimum;
end
end
beamform_inst.new_mask(k) = beamform_inst.new_mask(k) * mask;
end
end
for k = beamform_inst.low_mean_start_bin+1:1:beamform_inst.high_mean_end_bin
beamform_inst.time_smooth_mask(k) = beamform_inst.param.kMaskTimeSmoothAlpha * beamform_inst.new_mask(k) + ...
(1 - beamform_inst.param.kMaskTimeSmoothAlpha) * beamform_inst.time_smooth_mask(k);
end
%ApplyLowFrequencyCorrection
low_frequency_mask = mean(beamform_inst.time_smooth_mask(beamform_inst.low_mean_start_bin+1:...
beamform_inst.low_mean_end_bin));
beamform_inst.time_smooth_mask(1:beamform_inst.low_mean_start_bin) = low_frequency_mask;
%ApplyHighFrequencyCorrection
high_pass_postfilter_mask = mean(beamform_inst.time_smooth_mask(beamform_inst.high_mean_start_bin:...
beamform_inst.high_mean_end_bin));
beamform_inst.time_smooth_mask(beamform_inst.high_mean_end_bin+1:beamform_inst.param.kNumFreqBins) = high_pass_postfilter_mask;
%ApplyMaskFrequencySmoothing
beamform_inst.final_mask = beamform_inst.time_smooth_mask;
for t = beamform_inst.low_mean_start_bin+1:1:beamform_inst.high_mean_end_bin
beamform_inst.final_mask(t) = beamform_inst.param.kMaskFrequencySmoothAlpha * beamform_inst.final_mask(t) + ...
(1 - beamform_inst.param.kMaskFrequencySmoothAlpha) * beamform_inst.final_mask(t-1);
end
for t = beamform_inst.high_mean_end_bin + 1:-1:2
beamform_inst.final_mask_(t-1) = beamform_inst.param.kMaskFrequencySmoothAlpha * beamform_inst.final_mask(t-1) + ...
(1 - beamform_inst.param.kMaskFrequencySmoothAlpha) * beamform_inst.final_mask(t);
end
%ApplyMasks
for t = 1:1:beamform_inst.param.kNumFreqBins
output(t) = input(t,:) * beamform_inst.delay_sum_masks(t,:)';
% output(t) = input(t,1);
output(t) = output(t) * beamform_inst.final_mask(t) * beamform_inst.param.kCompensationGain;
end
for t = 1:1:4
output(t) = complex(0);
end
end
2.6 导向矢量计算
% *@author : aflyingwolf
% *@date : 2025.4.15
% *@file : mic_array.m
function A = mic_array()
A.direction=@mic_direction;
A.distance=@mic_distance;
A.steer=@mic_steer_vec;
end
% 两线垂直 == 点积和=0
% 两线平行 == 叉积长度为0
function [sign , array_normal_direction] = mic_direction(array)
num_mic = size(array,1);
assert(num_mic>=2);
sign = -1; % 线性1, 平面2
is_linear_array = true;
first_pair_direction = array(2,:) - array(1,:);
for i=3:num_mic
direction = array(i,:) - array(i-1,:);
if abs(norm(cross(first_pair_direction, direction))) > eps
is_linear_array = false;
break;
end
end
if is_linear_array
sign = 1;
array_normal_direction = [first_pair_direction(2), -first_pair_direction(1), 0];
return ;
end
assert(num_mic>=3);
second_pair_direction = array(3,:) - array(2,:);
normal_direction = cross(first_pair_direction, second_pair_direction);
is_plane_array = true;
for i=4:num_mic
pair_direction = array(i,:) - array(i-1,:);
if dot(normal_direction,pair_direction) > eps
is_plane_array = false;
break;
end
end
if is_plane_array
sign = 2;
array_normal_direction = normal_direction;
return ;
end
end
function array_distance = mic_distance(array)
num_mic = size(array, 1);
array_distance = zeros(num_mic, num_mic);
for t=1:num_mic
for s=t+1:num_mic
d = norm(array(t,:)-array(s,:));
array_distance(t,s) = d;
array_distance(s,t) = d;
end
end
end
function steer = mic_steer_vec(array, angle, f, c)
angle_radians = angle/180*pi;
num_mic = size(array,1);
dis = zeros(1, num_mic);
fz = reshape(f,[],1);
for i = 1:num_mic
dis(i) = cos(angle_radians)*array(i,1) + sin(angle_radians)*array(i,2);
end
phase = 2*pi*fz*dis/c;
steer = complex(cos(phase), sin(phase));
end
2.7 测试demo
% *@author : aflyingwolf
% *@date : 2025.4.15
% *@file : test_beamform.m
input_wav_file = 'simulate_role1_0_t60_0.2_role2_180_t60_0.2.wav';
%0.0463 0.0 0.0 0.02315 -0.0401 0.0 -0.02315 -0.0401 0.0 -0.0463 0.0 0.0 -0.02315 0.0401 0.0 0.02315 0.0401 0.0
array_geometry = [0.0463, 0.0, 0.0; 0.02315, -0.0401, 0.0;
-0.02315, -0.0401, 0.0; -0.0463, 0.0, 0.0;
-0.02315, 0.0401, 0.0; 0.02315, 0.0401, 0.0];
target_direction = 0 * pi / 180;
[wav_data, sample_rate_hz] = audioread(input_wav_file);
output = beamform_audio_process(wav_data, array_geometry, sample_rate_hz, [target_direction, 0, 1]);
% output = beamform_audio_process_fromfile(wav_data, array_geometry, sample_rate_hz, [target_direction, 0, 1]);
% output = wav_data;
output_filename = input_wav_file(1:(length(input_wav_file)-4));
output_filename = output_filename + "_webrtc_t0.wav";
audiowrite(output_filename, output, sample_rate_hz);
三、结果展示
3.1 0度方向的波束
3.2 180度方向的波束
四、总结
本节我们实现了基于webrtc的非线性波束形成算法仿真,从结果上看,降噪效果较好,可以很好的去除干扰,但是引入了非线性,对后面去除平稳噪声不利。