YOLO分割数据集转化（json转TXT）

news2025/7/13 23:08:41

一、数据集转化

import json
import os
from tqdm import tqdm
import glob
import os.path as osp

def json_to_txt(jsonfilePath, resultDirPath):
    """
    jsonfilePath: labelme标注好的*.json文件所在文件夹
    resultDirPath: 转换好后的*.txt保存文件夹
    """

    class_names = []
    jsonfileList = glob.glob(osp.join(jsonfilePath, "*.json"))

    for jsonfile in tqdm(jsonfileList, desc='Processing'):
        with open(jsonfile, "r", encoding='UTF-8') as f:
            file_in = json.load(f)
            shapes = file_in["shapes"]
            txt_file = osp.join(resultDirPath, osp.basename(jsonfile).replace(".json", ".txt"))

            with open(txt_file, "w") as file_handle:
                for shape in shapes:
                    class_name = shape["label"]
                    if class_name not in class_names:
                        class_names.append(class_name)
                    class_id = class_names.index(class_name)

                    points = shape["points"]
                    normalized_points = [
                        f"{point[0] / file_in['imageWidth']} {point[1] / file_in['imageHeight']}"
                        for point in points
                    ]

                    file_handle.write(f"{class_id} {' '.join(normalized_points)}\n")

    with open(osp.join(resultDirPath, 'classes.txt'), 'w') as f:
        f.write('\n'.join(class_names))

if __name__ == "__main__":
    jsonfilePath = r"E:\241231数据扩充图片\标签"  # 要转换的json文件所在目录
    resultDirPath = r"D:\TXT"  # 要生成的txt文件夹
    json_to_txt(jsonfilePath=jsonfilePath, resultDirPath=resultDirPath)

二、TXT标签验证

验证转化后的的标签是否正确，代码如下：

import os
import numpy as np
import cv2

IMG_FORMATS = ['.jpg', '.png', '.jpeg']
COLORS = [(0, 0, 0), (128, 0, 0), (0, 128, 0), (128, 128, 0), (0, 0, 128), (128, 0, 128), (0, 128, 128),
          (128, 128, 128), (64, 0, 0), (192, 0, 0), (64, 128, 0), (192, 128, 0), (64, 0, 128), (192, 0, 128),
          (64, 128, 128), (192, 128, 128), (0, 64, 0), (128, 64, 0), (0, 192, 0), (128, 192, 0), (0, 64, 128),
          (128, 64, 12)]

def get_files(img_path, label_path):
    """
    获取图像和标签文件列表
    """
    img_list = [i for i in os.listdir(img_path) if os.path.splitext(i)[-1].lower() in IMG_FORMATS]
    label_list = [i.replace('.txt', '') for i in os.listdir(label_path) if i.endswith('.txt')]

    assert img_list, f"在 {img_path} 中未找到图像文件"
    assert label_list, f"在 {label_path} 中未找到标签文件"

    img_without_label = set(i.rsplit('.', 1)[0] for i in img_list) - set(label_list)
    if img_without_label:
        print(f"没有对应标签的图像: {list(img_without_label)}")

    ok_img = list(set(i.rsplit('.', 1)[0] for i in img_list) - img_without_label)
    return ok_img, os.path.splitext(img_list[0])[-1].lower()

def convert_and_draw(x, w, h, img):
    """
    转换坐标并绘制多边形和标签
    """
    label, *points = x
    points = np.array(points, dtype=np.float32).reshape(-1, 2) * [w, h]
    points = points.astype(np.int32)

    color = COLORS[int(label) % len(COLORS)]
    cv2.fillPoly(img, [points], color=color)
    cv2.putText(img, text=str(int(label)), org=tuple(points[0]), fontFace=cv2.FONT_HERSHEY_COMPLEX,
                fontScale=1, color=color, thickness=3)

    return img

def main(img_path, label_path, outfile, if_show, show_num):
    if os.path.isdir(img_path):
        ok_img, img_ext = get_files(img_path, label_path)
        for i, img_name in enumerate(ok_img, 1):
            img = cv2.imread(os.path.join(img_path, img_name + img_ext))
            h, w = img.shape[:2]
            with open(os.path.join(label_path, img_name + '.txt'), 'r') as f:
                labels = [x.split() for x in f.read().strip().splitlines()]
            for label in labels:
                img = convert_and_draw(label, w, h, img)
            if if_show:
                cv2.namedWindow('Image with label', cv2.WINDOW_NORMAL)
                cv2.imshow('Image with label', img)
                cv2.waitKey(0)
            if i == show_num:
                break
    else:
        img = cv2.imread(img_path)
        h, w = img.shape[:2]
        with open(label_path, 'r') as f:
            labels = [x.split() for x in f.read().strip().splitlines()]
        for label in labels:
            img = convert_and_draw(label, w, h, img)
        if if_show:
            cv2.namedWindow('Image with label', cv2.WINDOW_NORMAL)
            cv2.imshow('Image with label', img)
            cv2.waitKey(0)

if __name__ == '__main__':
    # 修改输入图片文件夹
    img_path = r"C:\Users\Admin\Desktop\ultralytics-8.2.87\datasets\coco8-seg\images\train/000000000034.jpg"
    # 修改输入标签文件夹
    label_path = r"C:\Users\Admin\Desktop\ultralytics-8.2.87\datasets\coco8-seg\labels\train/000000000034.txt"
    # 输出文件夹
    outfile = './'
    # 是否展示绘制的图片
    if_show = True
    # 最大展示图片的数量(按空格切换)
    show_num = 3

    main(img_path, label_path, outfile, if_show, show_num)