yolov5 8系列 labelme数据标注 数据集生成终极教程
- 一.数据集准备
- 二.转换为yolo 数据集
一.数据集准备
- 创建一个data 文件夹
- 在data文件夹下创建一个images 文件夹
- 将所有图片数据放入images文件夹下
使用labelme标注数据
- python环境下使用
pip install labelme
安装labelme- 在cmd 中使用命令
labelme
命令打开软件- 进行标注
- 将标注文件和原图都放在images文件夹
标注完成后 images 文件夹下 存在原图和标注的json 文件
二.转换为yolo 数据集
在data文件夹下,根据labelme标签创建一个classes.txt 的类别文件
然后运行makedataset.py,直接生成labels 标签和train,val txt文件
import json
import cv2
import numpy as np
import glob
import os
def split_by_ratio(arr, *ratios):
"""
按比例拆分数组
:param arr:
:param ratios: 该参数的个数即为子数组的个数 eg: 0.5,0.5即为拆分两个各占50%的子数组
:return:
"""
arr = np.random.permutation(arr)
ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
return [x.tolist() for x in np.split(arr, ind)][:len(ratios)]
#读取中文路径
def cv_imread(file_path):
cv_img = cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
return cv_img
if __name__=="__main__":
# 文件列表
json_list = glob.glob("images/*.json")
np.random.shuffle(json_list)
trains,vals = split_by_ratio(json_list,0.9,0.1)
# 训练文件夹
if not os.path.exists("labels"):
os.makedirs("labels")
# 类别
class_names = []
for i, line in enumerate(open("classes.txt", encoding='utf-8').readlines()):
class_name = line.strip()
class_names.append(class_name)
i = 0
with open('train.txt', 'w') as f:
for t in trains:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
# 读取json文件
data = ""
with open(t, 'r', encoding='utf-8') as ft:
data = json.load(ft)
with open("labels/"+basename + ".txt", 'w') as fa:
for shape in data['shapes']:
assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
class_id = class_names.index(shape['label'])
width = data["imageWidth"]
height = data["imageHeight"]
x1, y1 = shape['points'][0]
x2, y2 = shape['points'][1]
x_center = (x1 + x2) / 2 / width
y_center = (y1 + y2) / 2 / height
width = abs(x2 - x1) / width
height = abs(y2 - y1) / height
fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
# yololabels
out_txt_file = "data/images/" +basename + ".jpg\n"
f.write(out_txt_file)
with open('val.txt', 'w') as f:
for t in vals:
basename = t.split("/")[-1].split("\\")[-1].split(".")[0]
# 读取json文件
data = ""
with open(t, 'r', encoding='utf-8') as ft:
data = json.load(ft)
with open("labels/"+basename+ ".txt", 'w') as fa:
for shape in data['shapes']:
assert shape['label'] in class_names, f"Error: {shape['label']} not found in {class_names}"
class_id = class_names.index(shape['label'])
x1, y1 = shape['points'][0]
x2, y2 = shape['points'][1]
x_center = (x1 + x2) / 2 / width
y_center = (y1 + y2) / 2 / height
width = abs(x2 - x1) / width
height = abs(y2 - y1) / height
fa.write(f"{class_id} {x_center} {y_center} {width} {height}\n")
# yololabels
out_txt_file = "data/images/" + basename+ ".jpg\n"
f.write(out_txt_file)