上篇文章https://blog.csdn.net/AWNUXCVBN/article/details/138372795 识别出的文字都是没有标点符号的,虽然可以添加整句断行,但总觉得怪怪的…… 于是看了一下,ncnn没有添加标点符号的功能,翻了一下onnx有示例,但没有C#的,于是照着搞了一番
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
public class OfflinePunctuation : IDisposable
{
private HandleRef _handle;
public OfflinePunctuation(SherpaOnnxOfflinePunctuationConfig config)
{
IntPtr intPtr = OfflinePunctuation.SherpaOnnxCreateOfflinePunctuation(config);
this._handle = new HandleRef(this, intPtr);
}
public string AddPunctuation(string text)
{
return SherpaOfflinePunctuationAddPunct(this._handle.Handle,text);
}
public void Dispose()
{
this.Cleanup();
GC.SuppressFinalize(this);
}
~OfflinePunctuation()
{
this.Cleanup();
}
private void Cleanup()
{
OfflinePunctuation.DestroyOfflinePunctuation(this._handle.Handle);
this._handle = new HandleRef(this, IntPtr.Zero);
}
[DllImport("sherpa-onnx-c-api", EntryPoint = "SherpaOnnxDestroyOfflinePunctuation")]
private static extern IntPtr DestroyOfflinePunctuation(IntPtr handle);
[DllImport("sherpa-onnx-c-api", EntryPoint = "SherpaOnnxCreateOfflinePunctuation")]
private static extern IntPtr SherpaOnnxCreateOfflinePunctuation(SherpaOnnxOfflinePunctuationConfig config);
[DllImport("sherpa-onnx-c-api", EntryPoint = "SherpaOfflinePunctuationAddPunct")]
private static extern string SherpaOfflinePunctuationAddPunct(IntPtr ptr, string text);
}
}
namespace SherpaOnnx
{
public struct SherpaOnnxOfflinePunctuationConfig
{
public SherpaOnnxOfflinePunctuationModelConfig model;
}
}
namespace SherpaOnnx
{
public struct SherpaOnnxOfflinePunctuationModelConfig
{
public string ctTransformer = "";
public int numThreads = 1;
public bool debug = true;
public string provider = "cpu";
public SherpaOnnxOfflinePunctuationModelConfig (string ctTransformer, int numThreads, bool debug, string provider)
{
this.ctTransformer = ctTransformer;
this.numThreads = numThreads;
this.debug = debug;
this.provider = provider;
}
}
}
SherpaOnnx.SherpaOnnxOfflinePunctuationConfig soopc = new SherpaOnnx.SherpaOnnxOfflinePunctuationConfig();
SherpaOnnx.SherpaOnnxOfflinePunctuationModelConfig soopmc =
new SherpaOnnx.SherpaOnnxOfflinePunctuationModelConfig
(Environment.CurrentDirectory + "/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx", 1, false, "cpu");
soopc.model = soopmc;
offlinePunctuation = new SherpaOnnx.OfflinePunctuation(soopc);
Console.writeLine(offlinePunctuation.AddPunctuation("When you only have one life that's what makes it special"));
看起来一切顺利,运行一下,不出意外的出意外了
微信群里@fangjun大佬指点了一番,说是要设置系统使用utf8编码
设置重启之后,正常了!
另外,大佬说,这个加标点模型,支持中英文。默认所有的标点,都是中文标点,即使你输入的都是英文。
最后,案发现场在https://github.com/xue-fei/uSherpaServer.git