环境准备:https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.7/deploy/hubserving#24-%E5%90%AF%E5%8A%A8%E6%9C%8D%E5%8A%A1
服务器启动命令
hub serving start -c deploy/hubserving/ocr_system/config.json
客户端请求
python tools/test_hubserving.py --server_url=server_url --image_dir=image_path
import base64
import sys
import time
import requests
import json
import asyncio
import aiohttp
import pandas as pd
from sqlalchemy import create_engine, text
import time
from PIL import Image
from io import BytesIO
import cv2
import numpy as np
import io
time1 = time.time()
def read_data():
conn = create_engine('mysql+pymysql://xxx:xxx@xxx.xxx.xx.xx:3306/x?charset=uxxxtf8')
connection = conn.connect()
page = 0
sql_select = f"SELECT * from xj_zsjh_png LIMIT {page * 10}, 100" # 1000条100s
results = pd.read_sql(sql=text(sql_select), con=connection)
out = json.loads(results.to_json(orient='records'))
img_strs_list = [x['bas64_str'] for x in out]
return img_strs_list
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def save_data(results, file_name):
df = pd.DataFrame([[i['text'] for i in x] for x in results])
df.to_csv(file_name, index=False)
def process_image_(img_str):
binary = base64.b64decode(img_str)
image = Image.open(BytesIO(binary))
# 创建一个新的RGB图像,将Alpha通道设置为0
rgb_image = Image.new('RGB', image.size, (255, 255, 255))
# 将RGBA图像的颜色信息复制到RGB图像
rgb_image.paste(image, (0, 0), mask=image)
# 裁剪图片
left = 535
top = 0
right = left + 240
bottom = image.size[1]
cropped_image = rgb_image.crop((left, top, right, bottom))
# 创建一个BytesIO对象
image_bytes = io.BytesIO()
# 将图像保存到BytesIO对象中
cropped_image.save(image_bytes, format='JPEG')
return image_bytes
def main2():
results = []
img_strs_list = read_data()
for i, img_str in enumerate(img_strs_list):
image_bytes = process_image_(img_str)
data = {'images': [cv2_to_base64(image_bytes.getvalue())]}
headers = {
'Content-Type': 'application/json'
}
response = requests.post("http://192.168.0.189:8868/predict/ocr_system", data=json.dumps(data), headers=headers)
if response.status_code == 200:
res = response.json()["results"][0]
results.append(res)
else:
print('Error:', response.status_code)
save_data(results, 'normal.csv')
async def process_image(img_str):
image_bytes = process_image_(img_str)
headers = {
'Content-Type': 'application/json'
}
# 发送 OCR 请求
data = {'images': [cv2_to_base64(image_bytes.getvalue())]}
async with aiohttp.ClientSession() as session:
async with session.post("http://192.168.0.189:8868/predict/ocr_system", data=json.dumps(data),
headers=headers) as response:
if response.status == 200:
res = (await response.json())["results"][0]
return res
else:
print(f'Error: {response.status}')
return None
async def process_images(img_strs_list):
tasks = []
sem = asyncio.Semaphore(1) # 限制并发数为5 当创建过多session时就会报错
async with sem:
for img_str in img_strs_list:
task = asyncio.create_task(process_image(img_str))
tasks.append(task)
results = await asyncio.gather(*tasks)
return results
# 在主函数中调用异步任务
async def main():
img_strs_list = read_data()
results = await process_images(img_strs_list)
save_data(results, 'async.csv')
# asyncio.run(main()) #100条 8.666
main2() #100条 9.667 # 96.832
print(f'当前页 共花费--> ', round(time.time() - time1, 3), '\n') # 1.813
(异步与非异步结果差不多)
结果