YoloV9实战：从Labelme到训练、验证、测试、模块解析

模型实战

训练COCO数据集

本次使用2017版本的COCO数据集作为例子，演示如何使用YoloV8训练和预测。

下载数据集

Images:

2017 Train images [118K/18GB] ：http://images.cocodataset.org/zips/train2017.zip
2017 Val images [5K/1GB]：http://images.cocodataset.org/zips/val2017.zip
2017 Test images [41K/6GB]：http://images.cocodataset.org/zips/unlabeled2017.zip

Annotations:

2017 annotations_trainval2017 [241MB]：http://images.cocodataset.org/annotations/annotations_trainval2017.zip

COCO转yolo格式数据集（适用V4，V5，V6，V7，V8）

最初的研究论文中，COCO中有91个对象类别。然而，在2014年的第一次发布中，仅发布了80个标记和分割图像的对象类别。2014年发布之后，2017年发布了后续版本。详细的类别如下：

ID	OBJECT (PAPER)	OBJECT (2014 REL.)	OBJECT (2017 REL.)	SUPER CATEGORY
1	person	person	person	person
2	bicycle	bicycle	bicycle	vehicle
3	car	car	car	vehicle
4	motorcycle	motorcycle	motorcycle	vehicle
5	airplane	airplane	airplane	vehicle
6	bus	bus	bus	vehicle
7	train	train	train	vehicle
8	truck	truck	truck	vehicle
9	boat	boat	boat	vehicle
10	trafficlight	traffic light	traffic light	outdoor
11	fire hydrant	fire hydrant	fire hydrant	outdoor
12	street	sign	-	-
13	stop sign	stop sign	stop sign	outdoor
14	parking meter	parking meter	parking meter	outdoor
15	bench	bench	bench	outdoor
16	bird	bird	bird	animal
17	cat	cat	cat	animal
18	dog	dog	dog	animal
19	horse	horse	horse	animal
20	sheep	sheep	sheep	animal
21	cow	cow	cow	animal
22	elephant	elephant	elephant	animal
23	bear	bear	bear	animal
24	zebra	zebra	zebra	animal
25	giraffe	giraffe	giraffe	animal
26	hat	-	-	accessory
27	backpack	backpack	backpack	accessory
28	umbrella	umbrella	umbrella	accessory
29	shoe	-	-	accessory
30	eye glasses	-	-	accessory
31	handbag	handbag	handbag	accessory
32	tie	tie	tie	accessory
33	suitcase	suitcase	suitcase	accessory
34	frisbee	frisbee	frisbee	sports
35	skis	skis	skis	sports
36	snowboard	snowboard	snowboard	sports
37	sports ball	sports ball	sports ball	sports
38	kite	kite	kite	sports
39	baseball bat	baseball bat	baseball bat	sports
40	baseball glove	baseball glove	baseball glove	sports
41	skateboard	skateboard	skateboard	sports
42	surfboard	surfboard	surfboard	sports
43	tennis racket	tennis racket	tennis racket	sports
44	bottle	bottle	bottle	kitchen
45	plate	-	-	kitchen
46	wine glass	wine glass	wine glass	kitchen
47	cup	cup	cup	kitchen
48	fork	fork	fork	kitchen
49	knife	knife	knife	kitchen
50	spoon	spoon	spoon	kitchen
51	bowl	bowl	bowl	kitchen
52	banana	banana	banana	food
53	apple	apple	apple	food
54	sandwich	sandwich	sandwich	food
55	orange	orange	orange	food
56	broccoli	broccoli	broccoli	food
57	carrot	carrot	carrot	food
58	hot dog	hot dog	hot dog	food
59	pizza	pizza	pizza	food
60	donut	donut	donut	food
61	cake	cake	cake	food
62	chair	chair	chair	furniture
63	couch	couch	couch	furniture
64	potted plant	potted plant	potted plant	furniture
65	bed	bed	bed	furniture
66	mirror	-	-	furniture
67	dining table	dining table	dining table	furniture
68	window	-	-	furniture
69	desk	-	-	furniture
70	toilet	toilet	toilet	furniture
71	door	-	-	furniture
72	tv	tv	tv	electronic
73	laptop	laptop	laptop	electronic
74	mouse	mouse	mouse	electronic
75	remote	remote	remote	electronic
76	keyboard	keyboard	keyboard	electronic
77	cell phone	cell phone	cell phone	electronic
78	microwave	microwave	microwave	appliance
79	oven	oven	oven	appliance
80	toaster	toaster	toaster	appliance
81	sink	sink	sink	appliance
82	refrigerator	refrigerator	refrigerator	appliance
83	blender	-	-	appliance
84	book	book	book	indoor
85	clock	clock	clock	indoor
86	vase	vase	vase	indoor
87	scissors	scissors	scissors	indoor
88	teddy bear	teddy bear	teddy bear	indoor
89	hair drier	hair drier	hair drier	indoor
90	toothbrush	toothbrush	toothbrush	indoor
91	hair brush	-	-	indoor

可以看到，2014年和2017年发布的对象列表是相同的，它们是论文中最初91个对象类别中的80个对象。所以在转换的时候，要重新对类别做映射，映射函数如下：

def coco91_to_coco80_class():  # converts 80-index (val2014) to 91-index (paper)
    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
    # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
    # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
    # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
    # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
    x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,
         None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
         51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
         None, 73, 74, 75, 76, 77, 78, 79, None]
    return x

接下来，开始格式转换，工程的目录如下：
在这里插入图片描述

coco：存放解压后的数据集。
-out：保存输出结果。
-coco2yolo.py：转换脚本。

转换代码如下：

import json
import glob
import os
import shutil
from pathlib import Path
import numpy as np
from tqdm import tqdm


def make_folders(path='../out/'):
    # Create folders

    if os.path.exists(path):
        shutil.rmtree(path)  # delete output folder
    os.makedirs(path)  # make new output folder
    os.makedirs(path + os.sep + 'labels')  # make new labels folder
    os.makedirs(path + os.sep + 'images')  # make new labels folder
    return path


def convert_coco_json(json_dir='./coco/annotations_trainval2017/annotations/'):
    jsons = glob.glob(json_dir + '*.json')
    coco80 = coco91_to_coco80_class()

    # Import json
    for json_file in sorted(jsons):
        fn = 'out/labels/%s/' % Path(json_file).stem.replace('instances_', '')  # folder name
        fn_images = 'out/images/%s/' % Path(json_file).stem.replace('instances_', '')  # folder name
        os.makedirs(fn,exist_ok=True)
        os.makedirs(fn_images,exist_ok=True)
        with open(json_file) as f:
            data = json.load(f)
        print(fn)
        # Create image dict
        images = {'%g' % x['id']: x for x in data['images']}

        # Write labels file
        for x in tqdm(data['annotations'], desc='Annotations %s' % json_file):
            if x['iscrowd']:
                continue

            img = images['%g' % x['image_id']]
            h, w, f = img['height'], img['width'], img['file_name']
            file_path='coco/'+fn.split('/')[-2]+"/"+f
            # The Labelbox bounding box format is [top left x, top left y, width, height]
            box = np.array(x['bbox'], dtype=np.float64)
            box[:2] += box[2:] / 2  # xy top-left corner to center
            box[[0, 2]] /= w  # normalize x
            box[[1, 3]] /= h  # normalize y

            if (box[2] > 0.) and (box[3] > 0.):  # if w > 0 and h > 0
                with open(fn + Path(f).stem + '.txt', 'a') as file:
                    file.write('%g %.6f %.6f %.6f %.6f\n' % (coco80[x['category_id'] - 1], *box))
            file_path_t=fn_images+f
            print(file_path,file_path_t)
            shutil.copy(file_path,file_path_t)


def coco91_to_coco80_class():  # converts 80-index (val2014) to 91-index (paper)
    # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
    # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
    # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
    # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
    # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
    x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,
         None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
         51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
         None, 73, 74, 75, 76, 77, 78, 79, None]
    return x

convert_coco_json()

开始运行：
在这里插入图片描述

转换完成后，验证转换的结果：

import cv2
import os

def draw_box_in_single_image(image_path, txt_path):
    # 读取图像
    image = cv2.imread(image_path)

    # 读取txt文件信息
    def read_list(txt_path):
        pos = []
        with open(txt_path, 'r') as file_to_read:
            while True:
                lines = file_to_read.readline()  # 整行读取数据
                if not lines:
                    break
                # 将整行数据分割处理，如果分割符是空格，括号里就不用传入参数，如果是逗号， 则传入‘，'字符。
                p_tmp = [float(i) for i in lines.split(' ')]
                pos.append(p_tmp)  # 添加新读取的数据
                # Efield.append(E_tmp)
                pass
        return pos


    # txt转换为box
    def convert(size, box):
        xmin = (box[1]-box[3]/2.)*size[1]
        xmax = (box[1]+box[3]/2.)*size[1]
        ymin = (box[2]-box[4]/2.)*size[0]
        ymax = (box[2]+box[4]/2.)*size[0]
        box = (int(xmin), int(ymin), int(xmax), int(ymax))
        return box

    pos = read_list(txt_path)
    print(pos)
    tl = int((image.shape[0]+image.shape[1])/2)
    lf = max(tl-1,1)
    for i in range(len(pos)):
        label = str(int(pos[i][0]))
        print('label is '+label)
        box = convert(image.shape, pos[i])
        image = cv2.rectangle(image,(box[0], box[1]),(box[2],box[3]),(0,0,255),2)
        cv2.putText(image,label,(box[0],box[1]-2), 0, 1, [0,0,255], thickness=2, lineType=cv2.LINE_AA)
        pass

    if pos:
        cv2.imwrite('./Data/see_images/{}.png'.format(image_path.split('\\')[-1][:-4]), image)
    else:
        print('None')



img_folder = "./out/images/val2017"
img_list = os.listdir(img_folder)
img_list.sort()

label_folder = "./out/labels/val2017"
label_list = os.listdir(label_folder)
label_list.sort()
if not os.path.exists('./Data/see_images'):
    os.makedirs('./Data/see_images')
for i in range(len(img_list)):
    image_path = img_folder + "\\" + img_list[i]
    txt_path = label_folder + "\\" + label_list[i]
    draw_box_in_single_image(image_path, txt_path)