身份证识别JAVA+OPENCV+OCR

news2024/10/25 0:44:02

一、相关的地址

https://github.com/tesseract-ocr/tessdata

Releases - OpenCV

opencv要装好，我装的是4.5.3的，最新版的没试过。

tessdata就下载了需要用的。好像还有best和fast的版本，我试了一下报错，不知道是不是版本不支持的问题。

二、主要的思路

识别的代码到没什么特别的，就是在每一行的识别上，为了提高准确度，稍微花了点心思，但也不时很完善。

发现识别的时候会出现很多干扰，所以从“姓”这一行开始算第一行，然后一行一行分析。

如果图片不清晰，不是太准，有待改进。

三、代码

pom.xml的依赖：

        <!-- OpenCV -->
        <dependency>
            <groupId>org.openpnp</groupId>
            <artifactId>opencv</artifactId>
            <version>4.5.3-4</version>
        </dependency>

        <!--  Tesseract -->
        <dependency>
            <groupId>net.sourceforge.tess4j</groupId>
            <artifactId>tess4j</artifactId>
            <version>5.11.0</version> <!-- 使用最新版本 -->
        </dependency>

JAVA代码：

package com.yutiandada.idcardread.demos.test;

import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.Size;
import org.opencv.imgcodecs.Imgcodecs;
import org.opencv.imgproc.Imgproc;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.List;

import net.sourceforge.tess4j.*;


public class IDCardRecognition2 {
    static {
        System.setProperty("java.library.path", "D:\\opencv\\opencv\\build\\java\\x64");
        try {
            Field fieldSysPath = ClassLoader.class.getDeclaredField("sys_paths");
            fieldSysPath.setAccessible(true);
            fieldSysPath.set(null, null);
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.loadLibrary(Core.NATIVE_LIBRARY_NAME);
    }

    public static Mat preprocessImage(String imagePath) {
        // 读取图像
        Mat image = Imgcodecs.imread(imagePath);

        // 可选：进行一些图像增强操作，例如锐化
        Mat enhancedImage = new Mat();
        Imgproc.GaussianBlur(image, enhancedImage, new Size(0, 0), 3);
        Core.addWeighted(image, 1.5, enhancedImage, -0.5, 0, enhancedImage);

        return enhancedImage;
    }

    public static List<String> recognizeText(Mat mat) throws TesseractException {
        try {
            // 将Mat转换为File
            File tempFile = File.createTempFile("temp", ".png");
            Imgcodecs.imwrite(tempFile.getAbsolutePath(), mat);

            // 初始化Tesseract
            Tesseract tesseract = new Tesseract();
            tesseract.setLanguage("chi_sim"); // 设置语言为简体中文
            tesseract.setDatapath("d:/tessdata"); // 设置Tesseract数据路径
//            tesseract.setPageSegMode(PageSegMode.PSM_AUTO); // 设置页面分割模式

            // 进行OCR识别
            String result = tesseract.doOCR(tempFile);
            tempFile.delete(); // 删除临时文件

            // 将结果按行分割
            List<String> lines = new ArrayList<>();
            String[] splitResult = result.split("\\r?\\n");
            for (String line : splitResult) {
                if (!line.trim().isEmpty()) {
                    lines.add(line.trim());
                }
            }

            return lines;
        } catch (IOException e) {
            // 处理异常，例如记录日志或显示错误信息
            e.printStackTrace();
            throw new TesseractException("Error during OCR processing", e);
        }
    }

    public static void main(String[] args) {
        try {
            String imagePath = "D:/images/test01.png";
            Mat processedImage = preprocessImage(imagePath);
            List<String> recognizedLines = recognizeText(processedImage);

            System.out.println("Recognized Text:");

            // 找到包含“姓”字的行索引
            int startIndex = -1;
            for (int i = 0; i < recognizedLines.size(); i++) {
                if (recognizedLines.get(i).contains("姓") || recognizedLines.get(i).contains("名") ) {
                    startIndex = i;
                    break;
                }
            }

            if (startIndex != -1) {
                // 从包含“姓”字的行开始重新编号
                List<String> filteredLines = recognizedLines.subList(startIndex, recognizedLines.size());

                // 第一行：“姓名”之后的字符串
                String name = filteredLines.get(0).replaceAll(".*姓名", "").trim();
                System.out.println("Name: " + name);

                // 第二行：性别
                String gender = "";
                String ethnicity = "";
                String secondLine = filteredLines.get(1);
                if (secondLine.contains("男")) {
                    gender = "男";
                } else if (secondLine.contains("女")) {
                    gender = "女";
                }
                int minIndex = secondLine.indexOf("民") + 2;
                if (minIndex > 0 && minIndex < secondLine.length()) {
                    ethnicity = secondLine.substring(minIndex).trim();
                }
                System.out.println("Gender: " + gender);
                System.out.println("Ethnicity: " + ethnicity);

                // 第三行：出生日期，只显示数字
                String thirdLine = filteredLines.get(2).replaceAll("[^0-9]", "").trim();
                if (thirdLine.length() == 8) {
                    String birthYear = thirdLine.substring(0, 4);
                    String birthMonth = thirdLine.substring(4, 6);
                    String birthDay = thirdLine.substring(6, 8);
                    System.out.println("Birth Year: " + birthYear);
                    System.out.println("Birth Month: " + birthMonth);
                    System.out.println("Birth Day: " + birthDay);
                } else {
                    System.out.println("Invalid date format in the third line.");
                }

                // 第四行：地址
                String addressPart1 = filteredLines.get(3).replaceAll("\\s+", "").substring(2).trim();
                String addressPart2 = filteredLines.get(4).replaceAll("\\s+", "").trim();
                String address = addressPart1 + addressPart2;
                System.out.println("Address: " + address);

                // 第六行：身份证号，只显示数字和字母
                String idNumber = filteredLines.get(5).replaceAll("[^a-zA-Z0-9]", "").trim();
                System.out.println("ID Number: " + idNumber);
            } else {
                System.out.println("No line containing '姓' found.");
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}

IDEA里面要配置一下