一.数据集准备

创建一个data 文件夹
在data文件夹下创建一个images 文件夹
将所有图片数据放入images文件夹下

使用labelme标注数据

python环境下使用 pip install labelme 安装labelme
在cmd 中使用命令 labelme 命令打开软件
进行标注
将标注文件和原图都放在images文件夹

标注完成后 images 文件夹下存在原图和标注的json 文件

在这里插入图片描述

二.转换为yolo 数据集

在data文件夹下,根据labelme标签创建一个classes.txt 的类别文件
然后运行makedataset.py,直接生成labels 标签和train,val txt文件

import json
import cv2
import numpy as np
import glob
import os

def split_by_ratio(arr, *ratios):
    """
    按比例拆分数组
    :param arr:
    :param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
    :return:
    """
    arr = np.random.permutation(arr)
    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
    return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]

#读取中文路径
def cv_imread(file_path):
    cv_img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
    return cv_img

if __name__=="__main__":
    # 文件列表
    json_list = glob.glob("images/*.json")
    np.random.shuffle(json_list)
    trains,vals = split_by_ratio(json_list,0.9,0.1)

    # 训练文件夹
    if not os.path.exists("labels"):
        os.makedirs("labels")

    # 类别
    class_names = []
    for i, line in enumerate(open("classes.txt", encoding='utf-8').readlines()):
        class_name = line.strip()
        class_names.append(class_name)
    
    i = 0
    
    with open('train.txt', 'w') as f:
        for t in trains:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            # 读取json文件
            data = ""
            with open(t, 'r', encoding='utf-8') as ft:
                data = json.load(ft)
                
            with open("labels/"+basename + ".txt", 'w') as fa:
                for shape in data['shapes']:
                    assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                    class_id  = class_names.index(shape['label'])
                    width = data["imageWidth"]
                    height = data["imageHeight"]
                    x1, y1 = shape['points'][0]
                    x2, y2 = shape['points'][1]
                    x_center = (x1 + x2) / 2 / width
                    y_center = (y1 + y2) / 2 / height
                    width = abs(x2 - x1) / width
                    height = abs(y2 - y1) / height

                    fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

            # yololabels
            out_txt_file = "data/images/" +basename + ".jpg\n"
            f.write(out_txt_file)


    with open('val.txt', 'w') as f:
        for t in vals:
            basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
            # 读取json文件
            data = ""
            with open(t, 'r', encoding='utf-8') as ft:
                data = json.load(ft)
                
            with open("labels/"+basename+ ".txt", 'w') as fa:
                for shape in data['shapes']:
                    assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
                    class_id  = class_names.index(shape['label'])

                    x1, y1 = shape['points'][0]
                    x2, y2 = shape['points'][1]
                    x_center = (x1 + x2) / 2 / width
                    y_center = (y1 + y2) / 2 / height
                    width = abs(x2 - x1) / width
                    height = abs(y2 - y1) / height

                    fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

            # yololabels
            out_txt_file = "data/images/" + basename+ ".jpg\n"
            f.write(out_txt_file)