在处理一些用户上传的音频的时候,往往根据用户的设备不通,文件格式难以统一,尤其是涉及到算法模型相关的,更是令人头疼,这里提供两种思路解决这个问题。
不借助三方库
这种采用的是javax.sound.sampled下的包来实现,缺点是需要预先知道目标的采样率等信息。
工具类
import com.example.phoneme.constant.WavConstant;
import lombok.extern.slf4j.Slf4j;
import javax.sound.sampled.*;
import java.io.*;
import java.util.Arrays;
@Slf4j
public class WavUtils {
public static byte[] toPCM(byte[] src) {
if (src.length > 44) {
return Arrays.copyOfRange(src, 44, src.length);
}
return new byte[0];
}
public static byte[] convertTo16kHzMono16bitPCM(byte[] audioData,int sampleRate,int sampleSizeBits,int channels,boolean signed,boolean bigEndian) {
try{
// 创建输入字节数组流
ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(audioData);
// 创建目标音频格式
AudioFormat targetFormat = new AudioFormat(WavConstant.SAMPLE_RATE, WavConstant.BIT_DEPTH, WavConstant.CHANNELS, WavConstant.SIGNED, WavConstant.BIG_ENDIAN);
// 创建目标音频输入流
AudioInputStream audioInputStream = new AudioInputStream(byteArrayInputStream, new AudioFormat(sampleRate, sampleSizeBits, channels, signed, bigEndian), audioData.length / 4);
// 转换音频格式
AudioInputStream convertedAudioInputStream = AudioSystem.getAudioInputStream(targetFormat, audioInputStream);
// 将转换后的音频数据写入字节数组
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
AudioSystem.write(convertedAudioInputStream, AudioFileFormat.Type.WAVE, byteArrayOutputStream);
// 将字节数组返回
byte[] convertedAudioData = byteArrayOutputStream.toByteArray();
// 关闭流
audioInputStream.close();
convertedAudioInputStream.close();
byteArrayOutputStream.close();
return convertedAudioData;
} catch (IOException e) {
e.printStackTrace();
return null;
}
}
public static boolean checkVideo(byte[] fileBytes) {
try (InputStream inputStream = new ByteArrayInputStream(fileBytes)) {
// 使用AudioSystem类获取音频文件的格式
AudioFileFormat audioFileFormat = AudioSystem.getAudioFileFormat(inputStream);
// 判断文件格式是否为WAV
if (audioFileFormat.getType() == AudioFileFormat.Type.WAVE) {
// 获取WAV文件的属性信息
AudioFormat audioFormat = audioFileFormat.getFormat();
double sampleRate = audioFormat.getSampleRate();
int channels = audioFormat.getChannels();
int bitDepth = audioFormat.getSampleSizeInBits();
log.info("上传的音频格式为:Sample Rate:{},Channels:{},Bit Depth:{}", sampleRate, channels, bitDepth);
if (sampleRate == WavConstant.SAMPLE_RATE
&& channels == WavConstant.CHANNELS
&& bitDepth == WavConstant.BIT_DEPTH) {
log.info("校验通过");
return true;
}
} else {
log.info("不是WAV文件");
return false;
}
} catch (UnsupportedAudioFileException | IOException e) {
log.info("不是WAV文件");
return false;
}
log.info("不是WAV文件");
return false;
}
}
常量类
public interface WavConstant {
float SAMPLE_RATE = 16000.0;
int CHANNELS = 1;
int BIT_DEPTH = 16;
boolean SIGNED = true;
boolean BIG_ENDIAN = false;
}
测试类
@Test
public void testTransform(){
try (FileInputStream fis=new FileInputStream("/path/to/file")){
byte[] buffer = new byte[1024];
int bytesRead;
ByteArrayOutputStream stream = new ByteArrayOutputStream();
while ((bytesRead = fis.read(buffer)) != -1) {
stream.write(buffer);
}
byte[] streamByteArray = stream.toByteArray();
boolean b1 = checkVideo(streamByteArray);
log.info("文件是否符合要求: {}", b1);
byte[] bytes = WavUtils.convertTo16kHzMono16bitPCM(streamByteArray,16000,16,2,true,false);
log.info("开始转换");
boolean b2 = checkVideo(bytes);
log.info("文件的字节长度是: {}", bytes.length);
log.info("文件是否符合要求: {}", b2);
}catch (IOException ioe){
ioe.printStackTrace();
}
}
运行结果
借助三方库
比较通用的解决方案是ffmpeg,这里提供一种方法,借助命令行实现,缺点也很明显,就是命令行对于代码来说可控程度不高,依赖环境,不好迁移,需要保存中间文件,但是优点是处理音频更灵活
工具类
import lombok.extern.slf4j.Slf4j;
import java.io.IOException;
@Slf4j
public class FfmpegUtil {
public static void toWav(String flvPath, int sampleRate, int channel, int bitrate, String fileType, String targetFilename) {
try {
String command = "ffmpeg -i " + flvPath + " -vn " + " -ar " + sampleRate + " -ac " + channel + " -ab " + bitrate + "k" + " -f " + fileType + " " + targetFilename;
Runtime.getRuntime().exec(new String[]{"sh", "-c", command});
} catch (IOException e) {
e.printStackTrace();
}
}
}
测试类这里就略去了
总结
这两种方法各有优略,实际中要酌情考虑
调用命令行有时候比较依赖环境,不好迁移,也可以请c++的工程师负责编成jni的形式