paddle v4 hubserving 部署

news2026/2/16 0:29:26

环境准备：https://github.com/PaddlePaddle/PaddleOCR/tree/release/2.7/deploy/hubserving#24-%E5%90%AF%E5%8A%A8%E6%9C%8D%E5%8A%A1
在这里插入图片描述

服务器启动命令

hub serving start -c deploy/hubserving/ocr_system/config.json

客户端请求
python tools/test_hubserving.py --server_url=server_url --image_dir=image_path

import base64
import sys
import time
import requests
import json
import asyncio
import aiohttp

import pandas as pd
from sqlalchemy import create_engine, text
import time


from PIL import Image
from io import BytesIO
import cv2
import numpy as np
import io

time1 = time.time()



def read_data():
    conn = create_engine('mysql+pymysql://xxx:xxx@xxx.xxx.xx.xx:3306/x?charset=uxxxtf8')
    connection = conn.connect()
    page = 0
    sql_select = f"SELECT * from xj_zsjh_png LIMIT {page * 10}, 100"  # 1000条100s  
    results = pd.read_sql(sql=text(sql_select), con=connection)
    out = json.loads(results.to_json(orient='records'))
    img_strs_list = [x['bas64_str'] for x in out]
    return img_strs_list


def cv2_to_base64(image):
    return base64.b64encode(image).decode('utf8')

def save_data(results, file_name):
    df = pd.DataFrame([[i['text'] for i in x] for x in results])

    df.to_csv(file_name, index=False)

def process_image_(img_str):
    binary = base64.b64decode(img_str)
    image = Image.open(BytesIO(binary))

    # 创建一个新的RGB图像，将Alpha通道设置为0
    rgb_image = Image.new('RGB', image.size, (255, 255, 255))
    # 将RGBA图像的颜色信息复制到RGB图像
    rgb_image.paste(image, (0, 0), mask=image)
    # 裁剪图片
    left = 535
    top = 0
    right = left + 240
    bottom = image.size[1]
    cropped_image = rgb_image.crop((left, top, right, bottom))

    # 创建一个BytesIO对象
    image_bytes = io.BytesIO()
    # 将图像保存到BytesIO对象中
    cropped_image.save(image_bytes, format='JPEG')
    return image_bytes






def main2():
    results = []
    img_strs_list = read_data()
    for i, img_str in enumerate(img_strs_list):
        image_bytes = process_image_(img_str)
        data = {'images': [cv2_to_base64(image_bytes.getvalue())]}
        headers = {
            'Content-Type': 'application/json'
        }
        response = requests.post("http://192.168.0.189:8868/predict/ocr_system", data=json.dumps(data), headers=headers)


        if response.status_code == 200:
            res = response.json()["results"][0]
            results.append(res)
        else:
            print('Error:', response.status_code)
    save_data(results, 'normal.csv')

async def process_image(img_str):
    image_bytes = process_image_(img_str)
    headers = {
        'Content-Type': 'application/json'
    }
    # 发送 OCR 请求
    data = {'images': [cv2_to_base64(image_bytes.getvalue())]}
    async with aiohttp.ClientSession() as session:
        async with session.post("http://192.168.0.189:8868/predict/ocr_system", data=json.dumps(data),
                                headers=headers) as response:
            if response.status == 200:
                res = (await response.json())["results"][0]
                return res
            else:
                print(f'Error: {response.status}')
                return None


async def process_images(img_strs_list):
    tasks = []
    sem = asyncio.Semaphore(1)  # 限制并发数为5 当创建过多session时就会报错
    async with sem:
        for img_str in img_strs_list:
            task = asyncio.create_task(process_image(img_str))
            tasks.append(task)
        results = await asyncio.gather(*tasks)

    return results


# 在主函数中调用异步任务
async def main():
    img_strs_list = read_data()
    results = await process_images(img_strs_list)
    save_data(results, 'async.csv')


# asyncio.run(main()) #100条 8.666
main2() #100条 9.667 # 96.832
print(f'当前页 共花费--> ', round(time.time() - time1, 3), '\n') # 1.813

（异步与非异步结果差不多）
结果
在这里插入图片描述