本文最早发表于电子发烧友论坛:【新提醒】【正点原子i.MX93开发板试用连载体验】基于深度学习的语音本地控制 - 正点原子学习小组 - 电子技术论坛 - 广受欢迎的专业电子论坛! (elecfans.com)
接下来就是要尝试训练中文提示词。首先要进行语料采集,这是一个比较耗费人力的事情,通常大公司会有有专人进行语料收集,我只好自己亲自做。这里参考了AliOS Things里面提供的一个录音工具,方便快速录音。对这个工具做了一点修改,原来的代码只能在Linux下运行,现在改成在Windows下也能运行。
import pyaudio
import wave
import random
import time
import os
from IPython import display
#from pydub import AudioSebment
#from pydub.playback import play
#from playsound import playsound
CHUNK = 2
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
SAMPLEWIDTH = 2
RECORD_SECONDS = 1
FILE_FORMAT = '.wav'
RECODER_NAME = 'lk'
#play stream
def play_wav(name, pyaudio):
f = wave.open(name,"rb")
#open stream
play_stream = pyaudio.open(format = p.get_format_from_width(f.getsampwidth()),
channels = f.getnchannels(),
rate = f.getframerate(),
output = True)
#read data
data = f.readframes(CHUNK)
while data:
play_stream.write(data)
data = f.readframes(CHUNK)
#stop stream
play_stream.stop_stream()
play_stream.close()
#close PyAudio
# pyaudio.terminate()
f.close()
def save_wav(name, frames):
wf = wave.open(name, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def record_wav(duration):
time.sleep(0.2) # 1sec, 0.1sec
print("开始录音,请说话......")
# count = 3
# for i in range(3):
# time.sleep(0.2) # 1sec, 0.1sec
# count -= 1
# print(count)
frames = []
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
for i in range(0, int(RATE * duration / SAMPLEWIDTH)):
data = stream.read(CHUNK, exception_on_overflow = False)
frames.append(data)
#count = 0
#while count < int(RECORD_SECONDS * RATE):
# data = stream.read(CHUNK)
# frames.append(data)
# count += CHUNK
stream.stop_stream()
stream.close()
print("录音结束!")
return frames
# main function
if __name__ == '__main__':
p = pyaudio.PyAudio()
# input('请按回车键开始录制!\n')
# record files
count = 0
for i in range(250):
input('请按回车键开始录制!\n')
print("开始第%d录制!" % count)
hash_name = str(hex(abs(hash(RECODER_NAME + str(random.random()))) % 1000000000)).replace('0x','') \
+ '_nohash_' + str(count) + FILE_FORMAT
rframes = record_wav(1) # record 1 sec
save_wav(hash_name, rframes)
#time.sleep(0.5) # 1sec, 0.1sec
print("录音回放开始!\n")
play_wav(hash_name, p)
print("录音回放结束!\n")
value = input("按‘回车’保存,放弃本条请按‘其他’键并回车!\n")
if (value == ''):
count += 1
print("保存录音成功!")
else:
os.remove(hash_name)
print("已删除本条录音!")
#display.display(display.Audio(hash_name, rate=16000))
#wav = AudioSegment.from_wav(hash_name)
#play(wav)
p.terminate()