一、技术栈
python 3.10.6
vosk 需下载对应模型(vosk-model-cn-0.22)模型下载慢的同学看最后的资源链接。
pyaudio
keyboard
二、实现功能
本地化实现麦克风语音录入,实时生成文字,并保存至本地文档。
三、实现代码
from vosk import Model, KaldiRecognizer
import pyaudio # 用于实时音频输入
import json # 用于解析识别结果
import keyboard
import threading
import sys # 引入 sys 模块用于退出程序
# 指向模型根目录
model_path = "D:/2.DevEnv/vosk/vosk-model-cn-0.22" # 注意路径使用正斜杠或双反斜杠
model = Model(model_path)
rec = KaldiRecognizer(model, 16000) # 采样率设为 16000,与模型匹配
# 初始化 PyAudio
p = pyaudio.PyAudio()
# 打开音频流
stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=4000)
stream.start_stream()
recording = False
lock = threading.Lock()
# 定义结束关键词
END_KEYWORDS = ["结束", "停止"]
# 用于存储识别出来的文本
recognized_text = ""
def start_recognition():
global recording, recognized_text
with lock:
recording = True
print("开始录音,请说话...")
try:
while True:
with lock:
if not recording:
break
data = stream.read(4000)
if rec.AcceptWaveform(data):
result = json.loads(rec.Result())
text = result.get("text", "")
print(text)
# 累加识别的文本
recognized_text += text + " "
# 检查是否包含结束关键词
for keyword in END_KEYWORDS:
if keyword in text:
stop_recognition()
break
except Exception as e:
print(f"录音过程中出现错误: {e}")
def stop_recognition():
global recording, recognized_text
with lock:
if not recording:
return
recording = False
print("停止录音")
try:
final_result = json.loads(rec.FinalResult())
final_text = final_result.get("text", "")
print("Final result:", final_text)
# 累加最终识别的文本
recognized_text += final_text
except Exception as e:
print(f"获取最终结果时出现错误: {e}")
# 关闭音频流和 PyAudio
try:
stream.stop_stream()
stream.close()
p.terminate()
except Exception as e:
print(f"关闭音频流时出现错误: {e}")
# 保存识别的文本到文件
try:
with open('recognized_text.txt', 'w', encoding='utf-8') as file:
file.write(recognized_text)
print("识别的文本已保存到 recognized_text.txt")
except Exception as e:
print(f"保存文件时出现错误: {e}")
# 终止程序运行
sys.exit(0)
def on_space_press(event):
global recording
if event.name == 'space':
with lock:
if not recording:
thread = threading.Thread(target=start_recognition)
thread.start()
else:
stop_recognition()
# 监听空格键事件
keyboard.on_press(on_space_press)
print("按空格键开始识别,说 '结束' 或 '停止' 来结束录音。")
keyboard.wait()
四、结果展示
五、相关资源
vosk模型下载链接: https://pan.baidu.com/s/13WAE_kRwd09I5JMbQjtnMw?pwd=bwtj 提取码: bwtj