前言

安装了pytorch框架以后，就可以玩一些开源的深度学习框架了，比如YOLO8，是基于pytorch框架的，关于如何安装pytorch框架，可以参考上篇文章链接《pytorch深度学习框架CUDA版本环境安装记录》
经过一番改造，用YOLO8做了一个实时在线视频物体检测的程序，结果显示，yolo8的实时性非常不错，可以用于机器人等实时的场景。
在这里插入图片描述

安装YOLO8

这个网上有很多了，安装非常简单，安装好pytorch框架后，可以直接用：
pip install ultralytics指令安装就可以
详细见作者github地址
安装的时候，可能最后会报一些安装依赖错误：
如numpy版本不对之类的，我没有理会，好像对运行不太有影响

安装YOLO8的模型

可以直接从网上下载,有很多渠道，
模型下载地址链接，本示例用的是yolov8n.pt，把模型下载到本地文件夹。
在这里插入图片描述

程序源代码

之前做过一篇yolo3的工具代码，基本差不多，用一个python的UI框架，PySimpleGUI，非常好用，关于这个UI的介绍，可以参考博文《python机器人视觉编程——入门篇（下）》。然后是opencv库。程序的所有代码如下：

# -*- coding: utf-8 -*-
"""
Created on Thu Apr 27 10:11:05 2023

@author: JAMES FEI <https://blog.csdn.net/kanbide>
Copyright (C) 2021 FEI PANFENG, All rights reserved.
THIS SOFTEWARE, INCLUDING DOCUMENTATION,IS PROTECTED BY COPYRIGHT CONTROLLED 
BY FEI PANFENG ALL RIGHTS ARE RESERVED.
"""

from ultralytics import YOLO
import cv2
import time
import PySimpleGUI as sg
import numpy as np

# Load a model
model = YOLO("yolov8n.pt")  # load an official model

#cap=cv2.imread("test.png")
cap=cv2.VideoCapture(0)
# Predict with the model and initail
results = model("test.png")  # predict on an image
#标签
lables=results[0].names
def resizeoutput(output,maxw=600,maxh=500):
    H=output.shape[0]    
    W=output.shape[1]
    ratio=None
    if W>=H:
        if W>maxw:
            gsizew=maxw
            gsizeh=int(H/W*maxw)
            ratio=maxw/W
        else:
            gsizew=W
            gsizeh=H
            ratio=1
    else:
        if H>maxh:
            gsizeh=maxh
            gsizew=int(W/H*maxh)
            ratio=maxh/H
        else:
            gsizew=W
            gsizeh=H
            ratio=1
    pic = cv2.resize(output, (gsizew, gsizeh), interpolation=cv2.INTER_LINEAR)
    return pic,ratio 

def getboxs(yoloresult,lables=lables):
    #获取识别框信息
    BOXS=[]
    outputs=yoloresult[0].boxes    
    class_IDs=outputs.cls.tolist()
    layables=[]
    for i in class_IDs:
        layables.append(lables[int(i)])
        
    confidences=outputs.conf.tolist()
    boxes=outputs.xyxy.tolist()
    if len(boxes):
        BOXS=[boxes,layables,confidences]    
    
    return BOXS
def drawbox(img,box,filte=0.5):    
    if type(box)==type([]):    
        if len(box)>0:
            for i in range(len(box[0])):        
                x,y,x1,y1=box[0][i]
                x=int(x)
                y=int(y)
                x1=int(x1)
                y1=int(y1)
                name=box[1][i]
                confi=box[2][i]
                if confi>=filte:
                    text = "{}: {:.4f}".format(name, confi)
                    cv2.putText(img, text, (x, y - 5), cv2.FONT_ITALIC, 0.5, [0, 255, 0], 2)
                    cv2.rectangle(img, (x, y), (x1, y1), (255,255,0), 2)

def video_viewer(cap,model=model):
    """
    视频显示器
    """
    layout= [   [sg.Text(size=(15,1),  key='-OUTPUT-')],
                [sg.Image(filename='', key='-IMAGE-')],
                [sg.Button('Exit')]
                ]
    win = sg.Window('YOLO视频检测', layout)
         
    while True:
        event, values = win.read(timeout=100)
        ret, frame = cap.read()
        if ret:
            frame,ra=resizeoutput(frame,maxh=400)
            results = model(frame) 
            boxs=getboxs(results)         
            drawbox(frame,boxs)
        imgbytes = cv2.imencode('.png', frame)[1].tobytes()
        win['-IMAGE-'].update(data=imgbytes)
        #win['-OUTPUT-'].update("video window:"+str(task0.is_alive()))
        if event is None or event == 'Exit':
            win.active  = False
            win.close()
            del win
            cap.release()
            break  
video_viewer(cap)