Java实现调用第三方接口识别发票信息
需求:对每个发票图片文件进行重命名,名称为发票号+固定信息,主要处理增值税发票
这里需要用到第三方接口,OCR识别功能,这里我用的是百度云接口,所以你需要注册百度云账号,然后生成密钥,获取权限接口!
百度云官方地址:https://console.bce.baidu.com/
百度云服务注册步骤
-
登陆百度云账号
扫码或手机号码短信登陆,搜索文字识别服务
-
创建应用,生成密钥
我这里只勾选一个增值税发票识别
-
获取密钥
创建完应用后直接用就行了
代码实现步骤
- 通过client_id和client_secret获取token
- 通过token和相关参数请求识别发票接口
- 返回结果
- 处理文件
package com.shac.sellinvoiceautomation;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;
import org.springframework.test.context.web.WebAppConfiguration;
import org.springframework.util.ObjectUtils;
import java.io.*;
import java.net.URLEncoder;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;
import java.util.Objects;
import java.util.concurrent.TimeUnit;
/**
* @ClassName OcrIdentificationInvoiceTest
* @Description TODO
* @Author jeckwu
* @Date 2023/5/19 14:22
*/
@SpringBootTest
@RunWith(SpringRunner.class)
@WebAppConfiguration
@Slf4j
public class OcrIdentificationInvoiceTest {
static final OkHttpClient HTTP_CLIENT = new OkHttpClient().newBuilder()
.connectTimeout(15, TimeUnit.SECONDS)
.readTimeout(15, TimeUnit.SECONDS)
.writeTimeout(15, TimeUnit.SECONDS).build();
// 获取token
public String getToken() throws IOException {
MediaType mediaType = MediaType.parse("application/json");
RequestBody body = RequestBody.create(mediaType, "");
Request request = new Request.Builder()
.url("https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=你的client_id&client_secret=你的client_secret")
.method("POST", body)
.addHeader("Content-Type", "application/json")
.addHeader("Accept", "application/json")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
assert response.body() != null;
JSONObject jsonObject = JSON.parseObject(response.body().string());
return jsonObject.getString("access_token");
}
// 识别发票
public String ocrInvoice(String token,String imgBase64) throws IOException{
MediaType mediaType = MediaType.parse("application/x-www-form-urlencoded");
//如果Content-Type是application/x-www-form-urlencoded时,第二个参数传true
RequestBody body = RequestBody.create(mediaType, "image="+imgBase64);
Request request = new Request.Builder()
.url("https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice?access_token="+token)
.method("POST", body)
.addHeader("Content-Type", "application/x-www-form-urlencoded")
.addHeader("Accept", "application/json")
.build();
Response response = HTTP_CLIENT.newCall(request).execute();
assert response.body() != null;
JSONObject jsonObject = JSON.parseObject(response.body().string());
if (!ObjectUtils.isEmpty(jsonObject.getString("error_msg"))){
log.error("识别失败,不是电子发票!msg:{}",jsonObject.getString("error_msg"));
return null;
}
return jsonObject.getJSONObject("words_result").getString("InvoiceNum");
}
/**
* 获取文件base64编码
*
* @param path 文件路径
* @param urlEncode 如果Content-Type是application/x-www-form-urlencoded时,传true
* @return base64编码信息,不带文件头
* @throws IOException IO异常
*/
static String getFileContentAsBase64(String path, boolean urlEncode) throws IOException {
byte[] b = Files.readAllBytes(Paths.get(path));
String base64 = Base64.getEncoder().encodeToString(b);
if (urlEncode) {
base64 = URLEncoder.encode(base64, "utf-8");
}
return base64;
}
final String invoicePath = "/Users/jeckwu/Desktop/test/IMG_8140.JPG";
@Test
public void test01() throws IOException {
String token = getToken();
String invoiceNumber = ocrInvoice(token, getFileContentAsBase64(invoicePath, true));
if (!ObjectUtils.isEmpty(invoiceNumber))
log.info("照片路径:{},发票号码:{}",invoicePath,invoiceNumber);
else {
log.info("照片路径:{},识别失败!不是电子发票!",invoicePath);
}
}
final String OCR_FOLDER = "/Users/jeckwu/Desktop/test/";
final String FILE_NAME_PREFIX = "test-invoice-";
// 批量识别
@Test
public void test02() throws IOException {
File file = new File(OCR_FOLDER);
if (ObjectUtils.isEmpty(file.listFiles())) {
log.info("没有要识别的发票文件!!!");
return;
}
String token = getToken();
// 遍历根目录的文件夹
for (File listFile : Objects.requireNonNull(file.listFiles())) {
// 是否是文件
if (listFile.isFile()) {
if (listFile.getAbsolutePath().indexOf(FILE_NAME_PREFIX)>0) continue;
// 识别发票文件
String invoiceNum= ocrInvoice(token, getFileContentAsBase64(listFile.getAbsolutePath(), true));
if (ObjectUtils.isEmpty(invoiceNum)) {
log.info("照片路径:{},识别失败!不是电子发票!",listFile.getAbsolutePath());
continue;
}
// 修改文件名
listFile.renameTo(new File(listFile.getParent() +File.separator+ FILE_NAME_PREFIX + invoiceNum + ".jpg"));
log.info("发票路径:{},发票号码:{},识别成功",listFile.getAbsolutePath(),invoiceNum);
}else if (listFile.isDirectory()){
// 遍历有发票文件的文件夹
File[] files = listFile.listFiles();
if (ObjectUtils.isEmpty(files)) continue;
for (File invoiceFile : files) {
if (invoiceFile.getAbsolutePath().indexOf(FILE_NAME_PREFIX)>0) continue;
// 识别发票文件
String invoiceNum= ocrInvoice(token, getFileContentAsBase64(invoiceFile.getAbsolutePath(), true));
if (ObjectUtils.isEmpty(invoiceNum)) {
log.info("照片路径:{},识别失败!不是电子发票!",invoiceFile.getAbsolutePath());
continue;
}
File tempFile = new File(invoiceFile.getParent() +File.separator+ FILE_NAME_PREFIX + invoiceNum + ".jpg");
// 修改文件名
// copyFileUsingStream(invoiceFile,tempFile);
invoiceFile.renameTo(tempFile);
log.info("发票路径:{},发票号码:{},识别成功",invoiceFile.getAbsolutePath(),invoiceNum);
}
}
}
}
// 复制文件
private static void copyFileUsingStream(File source, File dest) throws IOException {
InputStream is = null;
OutputStream os = null;
try {
is = new FileInputStream(source);
os = new FileOutputStream(dest);
byte[] buffer = new byte[1024];
int length;
while ((length = is.read(buffer)) > 0) {
os.write(buffer, 0, length);
}
} finally {
assert is != null;
is.close();
assert os != null;
os.close();
}
}
}
其中百度云这个识别服务每个月有免费试用次数且有限!!有需要可以去了解详情。
识别效果
基本能识别出来,前提是发票必须清楚,识别信息也比较全面,具体可参考该接口文档。
百度云官方调试网站:https://console.bce.baidu.com/tools/#/api?product=AI&project=%E6%96%87%E5%AD%97%E8%AF%86%E5%88%AB&parent=%E8%B4%A2%E5%8A%A1%E7%A5%A8%E6%8D%AEOCR&api=rest%2F2.0%2Focr%2Fv1%2Fvat_invoice&method=post
如果文章对你有帮助请点赞支持!!