在上一篇文章《基于百度飞桨PaddleOCR的图片文字识别》的基础上,做了个简单的扩展:
1、通过Pyqt5做个简单的UI界面;
2、通过OpenCV操作笔记本摄像头进行视频显示、拍照等功能;
3、加载图片;
4、对拍照图片或者加载的图片,进行PaddleOCR的图片文字识别;
一、UI界面设计
1、PyCharm工程中,在ocrtest文件夹下新建ocr_camera.ui文件,然后右键,找到"External Tools",选择QT Designer,进入QT5的UI设计界面
2、添加相应的控件、设置好参数。如显示视频及图片的QLable,默认情况下是透明的,设置背景色方便设置大小。
QLable设置背景色:2.1、选中autoFillBackground
2.2、改变Window调色板
最终的UI界面如下:
3、保存ui文件。
如果先打开QT Designer,设计后,保存ocr_camera.ui到ocrtest文件夹下,效果是一样的。
4、回到PyCharm工程,ocr_camera.ui文件,然后右键,找到"External Tools",选择PyUIC,将UI设计文件转换为ocr_camera.py文件。
ocr_camera.py文件如下:
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'ocr_camera.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_ocr_camera(object):
def setupUi(self, ocr_camera):
ocr_camera.setObjectName("ocr_camera")
ocr_camera.resize(1278, 750)
self.imageocrButton = QtWidgets.QPushButton(ocr_camera)
self.imageocrButton.setGeometry(QtCore.QRect(970, 20, 81, 23))
self.imageocrButton.setObjectName("imageocrButton")
self.takephotosButton = QtWidgets.QPushButton(ocr_camera)
self.takephotosButton.setGeometry(QtCore.QRect(670, 20, 75, 23))
self.takephotosButton.setObjectName("takephotosButton")
self.closeCameraButton = QtWidgets.QPushButton(ocr_camera)
self.closeCameraButton.setGeometry(QtCore.QRect(770, 20, 75, 23))
self.closeCameraButton.setObjectName("closeCameraButton")
self.openCameraButton = QtWidgets.QPushButton(ocr_camera)
self.openCameraButton.setGeometry(QtCore.QRect(570, 20, 75, 23))
self.openCameraButton.setObjectName("openCameraButton")
self.loadImageButton = QtWidgets.QPushButton(ocr_camera)
self.loadImageButton.setGeometry(QtCore.QRect(870, 20, 75, 23))
self.loadImageButton.setObjectName("loadImageButton")
self.showtext = QtWidgets.QTextEdit(ocr_camera)
self.showtext.setGeometry(QtCore.QRect(450, 70, 791, 601))
self.showtext.setObjectName("showtext")
self.cameralabel = QtWidgets.QLabel(ocr_camera)
self.cameralabel.setGeometry(QtCore.QRect(10, 30, 401, 281))
palette = QtGui.QPalette()
brush = QtGui.QBrush(QtGui.QColor(255, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.Base, brush)
brush = QtGui.QBrush(QtGui.QColor(0, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.Window, brush)
brush = QtGui.QBrush(QtGui.QColor(255, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.Base, brush)
brush = QtGui.QBrush(QtGui.QColor(0, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.Window, brush)
brush = QtGui.QBrush(QtGui.QColor(0, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.Base, brush)
brush = QtGui.QBrush(QtGui.QColor(0, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.Window, brush)
self.cameralabel.setPalette(palette)
self.cameralabel.setTabletTracking(False)
self.cameralabel.setAcceptDrops(False)
self.cameralabel.setAutoFillBackground(True)
self.cameralabel.setText("")
self.cameralabel.setOpenExternalLinks(False)
self.cameralabel.setObjectName("cameralabel")
self.imagelabel = QtWidgets.QLabel(ocr_camera)
self.imagelabel.setGeometry(QtCore.QRect(10, 380, 401, 281))
palette = QtGui.QPalette()
brush = QtGui.QBrush(QtGui.QColor(255, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.Base, brush)
brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Active, QtGui.QPalette.Window, brush)
brush = QtGui.QBrush(QtGui.QColor(255, 255, 255))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.Base, brush)
brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Inactive, QtGui.QPalette.Window, brush)
brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.Base, brush)
brush = QtGui.QBrush(QtGui.QColor(255, 170, 127))
brush.setStyle(QtCore.Qt.SolidPattern)
palette.setBrush(QtGui.QPalette.Disabled, QtGui.QPalette.Window, brush)
self.imagelabel.setPalette(palette)
self.imagelabel.setAutoFillBackground(True)
self.imagelabel.setText("")
self.imagelabel.setObjectName("imagelabel")
self.retranslateUi(ocr_camera)
QtCore.QMetaObject.connectSlotsByName(ocr_camera)
def retranslateUi(self, ocr_camera):
_translate = QtCore.QCoreApplication.translate
ocr_camera.setWindowTitle(_translate("ocr_camera", "Form"))
self.imageocrButton.setText(_translate("ocr_camera", "图片文字识别"))
self.takephotosButton.setText(_translate("ocr_camera", "拍照"))
self.closeCameraButton.setText(_translate("ocr_camera", "关闭摄像头"))
self.openCameraButton.setText(_translate("ocr_camera", "打开摄像头"))
self.loadImageButton.setText(_translate("ocr_camera", "导入图片"))
到此,界面设计完成。
二、功能实现
1、ocrtest文件夹下新建main.py文件。所有功能在此文件实现。
注意:
1、main.py里添加import paddleocr;
2、paddleocr.py注释掉“if name == ‘main’:”这段测试代码,main.py中有实现。
话不多说,直接上代码,main.py:
# -*- coding: utf-8 -*-
import os
import sys
from PyQt5 import QtGui
from PyQt5.Qt import *
import cv2
from ocr_camera import Ui_ocr_camera # 增加文本显示器
import paddleocr
from ppocr.utils.logging import get_logger
logger = get_logger()
from ppocr.utils.utility import get_image_file_list
class ocrTestUI(Ui_ocr_camera, QWidget):
def __init__(self):
super().__init__()
self.setupUi(self)
self.timer = QTimer()
self.timer.timeout.connect(self.show_stream) #定时刷新摄像头视频流
self.cap = cv2.VideoCapture() #初始化摄像头
self.cameralabel.setScaledContents(True) #摄像头自适应
self.imagelabel.setScaledContents(True) #图片自适应
self.openCameraButton.clicked.connect(self.openCamera) #打开摄像头
self.takephotosButton.clicked.connect(self.takePhotos) #拍照
self.closeCameraButton.clicked.connect(self.closeCamera) #关闭摄像头
self.loadImageButton.clicked.connect(self.loadImage) #加载图片
self.imageocrButton.clicked.connect(self.imageRecognition) #图片OCR测试
def show_stream(self):
flag, self.image = self.cap.read() #从视频流中读取图片
image_show = cv2.resize(self.image, (1280, 720)) #把读到的帧的大小重新设置为 1280*720
width, height = image_show.shape[:2]
image_show = cv2.cvtColor(image_show, cv2.COLOR_BGR2RGB) #opencv读的通道是BGR,要转成RGB
#把读取到的视频数据变成QImage形式(图片数据、高、宽、RGB颜色空间,三个通道各有2**8=256种颜色)
self.showImage = QtGui.QImage(image_show.data, height, width, QImage.Format_RGB888)
self.cameralabel.setPixmap(QPixmap.fromImage(self.showImage)) #往显示视频的Label里显示QImage
def openCamera(self):
self.cap = cv2.VideoCapture(0) #打开摄像头
self.timer.start(40) #每40毫秒读取一次,即刷新率为25帧
def takePhotos(self):
global ocrimage
if self.cap.isOpened(): #摄像头已打开
self.imagelabel.setPixmap(QtGui.QPixmap.fromImage(self.showImage))
self.showImage.save('./img/temp.jpg')
ocrimage = 'img/temp.jpg'
else: #摄像头未打开
self.cap = cv2.VideoCapture(0)
flag, self.image = self.cap.read() #从视频流中读取图片
image_show = cv2.resize(self.image, (1280, 720)) #把读到的帧的大小重新设置为 1280*720
width, height = image_show.shape[:2]
image_show = cv2.cvtColor(image_show, cv2.COLOR_BGR2RGB) # opencv读的通道是BGR,要转成RGB
#把读取到的视频数据变成QImage形式(图片数据、高、宽、RGB颜色空间,三个通道各有2**8=256种颜色)
self.showImage = QtGui.QImage(image_show.data, height, width, QImage.Format_RGB888)
self.imagelabel.setPixmap(QPixmap.fromImage(self.showImage)) #往显示图片的Label里显示QImage
self.showImage.save('./img/temp.jpg')
ocrimage = 'img/temp.jpg'
self.cap.release() #释放摄像头
def closeCamera(self):
self.timer.stop() #停止读取
self.cap.release() #释放摄像头
self.cameralabel.clear() #清除cameralabel组件上的图片
def loadImage(self):
global ocrimage
fname, _ = QFileDialog.getOpenFileName(self, '选择图片', './', 'Image files(*.jpg *.gif *.png*.bmp)')
self.showImage = fname
ocrimage = self.showImage
self.imagelabel.setPixmap(QPixmap(self.showImage))
self.showtext.append("loadImage {}".format(ocrimage))
def imageRecognition(self):
global ocrimage
self.showtext.append("loadImage {}".format(ocrimage))
self.OCRmain(ocrimage)
def OCRmain(self,image_dir):
#self.showtext.append("hello ")
args = paddleocr.parse_args(mMain=True)
#image_dir = 'img/test.jpg'
self.showtext.append(image_dir)
image_file_list = get_image_file_list(image_dir)
if len(image_file_list) == 0:
self.showtext.append("no images find")
engine = paddleocr.PaddleOCR()
for img_path in image_file_list:
img_name = os.path.basename(img_path).split('.')[0]
logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
result = engine.ocr(img_path,
det=True, #识别
rec=True, #检测
cls=True) #使用方向分类器识别180度旋转文字
if result is not None:
for idx in range(len(result)):
res = result[idx]
for line in res:
logger.info(line)
self.showtext.append("{}".format(line))
else:
print("result is none")
# 显示结果
from PIL import Image
result = result[0]
image = Image.open(img_path).convert('RGB')
boxes = [line[0] for line in result]
txts = [line[1][0] for line in result]
scores = [line[1][1] for line in result]
im_show = paddleocr.draw_ocr(image, boxes, txts, scores, font_path='./fonts/simfang.ttf')
im_show = Image.fromarray(im_show)
im_show.save('img/result.jpg')
if __name__ == '__main__':
app = QApplication(sys.argv)
window = ocrTestUI()
window.show()
sys.exit(app.exec())
三、测试
1、打开摄像头,拍照、关闭摄像头如下:
打开摄像头,上面的lable会显示视频流。拍照后,下面lable显示图片。
也可以不用打开摄像头,直接拍照。
2、拍照图片文字识别,结果如下:
3、加载图片并识别,结果如下: