1.标注数据集
使用labelme进行数据集标注,首先进行labelme安装:
pip install labelme
然后用labelme进行数据标注。
2.数据集格式转换
json to txt以及 划分为train、val、test,适配yolov8-seg。
数据集格式转换脚本:
# labelme2yolo-seg
# -*- coding:utf-8 -*-
import os
import json
import random
import shutil
import argparse
from tqdm import tqdm
def convert_label_json(json_dir, save_dir, classes):
json_paths = os.listdir(json_dir)
classes = classes.split(',')
for json_path in tqdm(json_paths):
path = os.path.join(json_dir, json_path)
with open(path, 'r') as load_f:
json_dict = json.load(load_f)
h, w = json_dict['imageHeight'], json_dict['imageWidth']
txt_path = os.path.join(save_dir, json_path.replace('json', 'txt'))
with open(txt_path, 'w') as txt_file:
for shape_dict in json_dict['shapes']:
label = shape_dict['label']
if label not in classes:
print(f"Warning: Label '{label}' not found in classes.")
continue
label_index = classes.index(label)
points = shape_dict['points']
points_nor_list = []
for point in points:
points_nor_list.append(point[0] / w)
points_nor_list.append(point[1] / h)
points_nor_list = list(map(str, points_nor_list))
points_nor_str = ' '.join(points_nor_list)
label_str = f"{label_index} {points_nor_str}\n"
txt_file.write(label_str)
def data_split(full_list, ratio):
n_total = len(full_list)
offset = int(n_total * ratio)
if n_total == 0 or offset < 1:
return [], full_list
random.shuffle(full_list)
sublist_1 = full_list[:offset]
sublist_2 = full_list[offset:]
return sublist_1, sublist_2
def create_directories(base_dir, sub_dirs):
for sub_dir in sub_dirs:
path = os.path.join(base_dir, sub_dir)
if not os.path.exists(path):
os.mkdir(path)
def split_dataset(images_dir, labels_dir, train_p, val_p, proportion_):
total_file = os.listdir(images_dir)
num = len(total_file)
list_ = list(range(num))
list1, list2 = data_split(list_, proportion_)
for i in range(num):
file = total_file[i]
name = file.split('.')[0]
if i in list1:
jpg_2 = os.path.join(train_p, 'images', file)
txt_2 = os.path.join(train_p, 'labels', name + '.txt')
elif i in list2:
jpg_2 = os.path.join(val_p, 'images', file)
txt_2 = os.path.join(val_p, 'labels', name + '.txt')
else:
continue
jpg_1 = os.path.join(images_dir, file)
txt_1 = os.path.join(labels_dir, name + '.txt')
if os.path.exists(txt_1) and os.path.exists(jpg_1):
shutil.copyfile(jpg_1, jpg_2)
shutil.copyfile(txt_1, txt_2)
else:
print(f"File not found: {jpg_1} or {txt_1}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Convert JSON to TXT and split datasets')
parser.add_argument('--json-dir', type=str, default=r'C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\images\json', help='JSON path dir')
parser.add_argument('--save-dir', type=str, default=r'C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\images\txt', help='TXT save dir')
parser.add_argument('--classes', type=str, default='whistlewave', help='Comma-separated classes')
parser.add_argument('--images-dir', type=str, default=r'C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\images', help='Images path dir')
parser.add_argument('--train-dir', type=str, default=r'C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\Dataset\train', help='Train directory')
parser.add_argument('--val-dir', type=str, default=r'C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\Dataset\val', help='Validation directory')
parser.add_argument('--proportion', type=float, default=0.9, help='Train proportion')
args = parser.parse_args()
# Convert JSON to TXT
convert_label_json(args.json_dir, args.save_dir, args.classes)
# Create directories for train and val
create_directories(args.train_dir, ['images', 'labels'])
create_directories(args.val_dir, ['images', 'labels'])
# Split dataset
split_dataset(args.images_dir, args.save_dir, args.train_dir, args.val_dir, args.proportion)
print("数据集划分完成")
3.下载yolov8模型
在官网下载yolov8模型和yolov8n-seg.pt权重文件:GitHub - ultralytics/ultralytics: Ultralytics YOLO11 🚀
4.配置文件
在ultralytics/cfg/datasets目录下复制一份coc128-seg.yaml复制ultralytics目录,重新命名成whistlewave-seg.yaml;
然后将文件内容改成自己的数据路径:
# Ultralytics YOLO 🚀, AGPL-3.0 license
# COCO128-seg dataset https://www.kaggle.com/ultralytics/coco128 (first 128 images from COCO train2017) by Ultralytics
# Documentation: https://docs.ultralytics.com/datasets/segment/coco/
# Example usage: yolo train data=coco128.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco128-seg ← downloads here (7 MB)
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
path: C:/Users/WYX/Desktop/code/matlab-code/LW/mmsegmentation/ultralytics-main/dataset # dataset root dir
train: C:/Users/WYX/Desktop/code/matlab-code/LW/mmsegmentation/ultralytics-main/dataset/train # train images (relative to 'path') 128 images
val: C:/Users/WYX/Desktop/code/matlab-code/LW/mmsegmentation/ultralytics-main/dataset/val # val images (relative to 'path') 128 images
test: C:/Users/WYX/Desktop/code/matlab-code/LW/mmsegmentation/ultralytics-main/dataset/test # test images (optional)
# Classes
names:
0: whistlewave
# Download script/URL (optional)
download: https://github.com/ultralytics/assets/releases/download/v0.0.0/coco128-seg.zip
5.训练
yolo task=segment mode=train model=C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\ultralytics-main\yolov8n-seg.pt data=C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\ultralytics-main\whistlewave-seg.yaml batch=2 imgsz=640 epochs=300 pretrained=True mosaic=0.0
6.验证
yolo task=segment mode=val model=C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\ultralytics-main\runs\segment\train\weights\best.pt data=C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\ultralytics-main\whistlewave-seg.yaml
7.预测
yolo task=segment mode=predict model=C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\ultralytics-main\runs\segment\train\weights\best.pt source=C:\Users\WYX\Desktop\code\matlab-code\LW\mmsegmentation\ultralytics-main\ultralytics\assets