XML和JSON格式转换成txt

news2026/2/12 8:25:12

XML如下这种：

转换代码

import os
import xml.etree.ElementTree as ET

# xml文件存放目录(修改成自己的文件名)
input_dir = r'C:\121\Annotations'

# 输出txt文件目录（自己创建的文件夹）
out_dir = r'C:\121\txt'

class_list = []


# 获取目录所有xml文件
def file_name(input_dir):
    F = []
    for root, dirs, files in os.walk(input_dir):

        for file in files:
            # print file.decode('gbk')    #文件名中有中文字符时转码
            if os.path.splitext(file)[1] == '.xml':
                t = os.path.splitext(file)[0]
                F.append(t)  # 将所有的文件名添加到L列表中
    return F  # 返回L列表


# 获取所有分类
def get_class(filelist):
    for i in filelist:
        f_dir = input_dir + "\\" + i + ".xml"
        in_file = open(f_dir, encoding='UTF-8')
        filetree = ET.parse(in_file)
        in_file.close()
        root = filetree.getroot()
        for obj in root.iter('object'):
            cls = obj.find('name').text
            if cls not in class_list:
                class_list.append(cls)


def ConverCoordinate(imgshape, bbox):
    # 将xml像素坐标转换为txt归一化后的坐标
    xmin, xmax, ymin, ymax = bbox
    width = imgshape[0]
    height = imgshape[1]
    dw = 1. / width
    dh = 1. / height
    x = (xmin + xmax) / 2.0
    y = (ymin + ymax) / 2.0
    w = xmax - xmin
    h = ymax - ymin

    # 归一化
    x = x * dw
    y = y * dh
    w = w * dw
    h = h * dh

    return x, y, w, h


def readxml(i):
    f_dir = input_dir + "\\" + i + ".xml"

    txtresult = ''

    outfile = open(f_dir, encoding='UTF-8')
    filetree = ET.parse(outfile)
    outfile.close()
    root = filetree.getroot()

    # 获取图片大小
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    imgshape = (width, height)

    # 转化为yolov5的格式
    for obj in root.findall('object'):
        # 获取类别名
        obj_name = obj.find('name').text

        obj_id = class_list.index(obj_name)
        # 获取每个obj的bbox框的左上和右下坐标
        bbox = obj.find('bndbox')
        xmin = float(bbox.find('xmin').text)
        xmax = float(bbox.find('xmax').text)
        ymin = float(bbox.find('ymin').text)
        ymax = float(bbox.find('ymax').text)
        bbox_coor = (xmin, xmax, ymin, ymax)

        x, y, w, h = ConverCoordinate(imgshape, bbox_coor)
        txt = '{} {} {} {} {}\n'.format(obj_id, x, y, w, h)
        txtresult = txtresult + txt

    # print(txtresult)
    f = open(out_dir + "\\" + i + ".txt", 'a')
    f.write(txtresult)
    f.close()


# 获取文件夹下的所有文件
filelist = file_name(input_dir)

# 获取所有分类
get_class(filelist)

# 打印class
print(class_list)

# xml转txt
for i in filelist:
    readxml(i)

# 在out_dir下生成一个class文件
f = open(out_dir + "\\classes.txt", 'a')
classresult = ''
for i in class_list:
    classresult = classresult + i + "\n"
f.write(classresult)
f.close()

JSON格式一：

{“image”: “3591.jpg”, “annotations”: [{“label”: “boat”, “coordinates”: {“x”: 163.9615384615385, “y”: 76.0384615384616, “width”: 146.0, “height”: 23.0}}, {“label”: “boat”, “coordinates”: {“x”: 247.4615384615385, “y”: 38.538461538461604, “width”: 291.0, “height”: 52.0}}, {“label”: “boat”, “coordinates”: {“x”: 1756.9615384615386, “y”: 32.538461538461604, “width”: 136.0, “height”: 56.0}}]}]

对应的转换代码如下：

import os
import json

# 指定包含 JSON 文件的文件夹路径
folder_path = "C:/labels/json/"

# 获取文件夹中所有 JSON 文件的文件名
json_files = [f for f in os.listdir(folder_path) if f.endswith(".json")]

for json_file in json_files:
    # 构建 JSON 文件的完整路径
    json_path = os.path.join(folder_path, json_file)

    # 读取 JSON 文件
    with open(json_path, "r") as file:
        data = json.load(file)

    # 提取注释信息
    annotations = data[0]['annotations']

    # 转换为文本格式
    annotations_str = ""
    for annotation in annotations:
        coordinates = annotation['coordinates']
        #设置输出的内容格式
        #annotation_str = f"label: {annotation['label']}, x: {coordinates['x']}, y: {coordinates['y']}, width: {coordinates['width']}, height: {coordinates['height']}\n"
        annotation_str = f"{'0'} {coordinates['x']} {coordinates['y']} {coordinates['width']} {coordinates['height']}\n"
        annotations_str += annotation_str

    # 构建文本文件的路径和文件名
    txt_file = os.path.splitext(json_file)[0] + ".txt"
    txt_path = os.path.join(folder_path, txt_file)

    # 保存注释信息为文本文件
    with open(txt_path, "w") as file:
        file.write(annotations_str)

JSON格式二

转换代码

import json
import os

name2id = {'boat': 0}  # 标签名称


def convert(img_size, box):
    dw = 1. / (img_size[0])
    dh = 1. / (img_size[1])
    x = (box[0] + box[2]) / 2.0 - 1
    y = (box[1] + box[3]) / 2.0 - 1
    w = box[2] - box[0]
    h = box[3] - box[1]
    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return (x, y, w, h)


def decode_json(json_floder_path, json_name):
    txt_name = 'C:/Users/labels/txt/' + json_name[0:-5] + '.txt'
    # txt文件夹的绝对路径
    txt_file = open(txt_name, 'w')

    json_path = os.path.join(json_floder_path, json_name)
    data = json.load(open(json_path, 'r', encoding='gb2312', errors='ignore'))

    img_w = data['imageWidth']
    img_h = data['imageHeight']

    for i in data['shapes']:

        label_name = i['label']
        if (i['shape_type'] == 'rectangle'):
            x1 = int(i['points'][0][0])
            y1 = int(i['points'][0][1])
            x2 = int(i['points'][1][0])
            y2 = int(i['points'][1][1])

            bb = (x1, y1, x2, y2)
            bbox = convert((img_w, img_h), bb)
            txt_file.write(str(name2id[label_name]) + " " + " ".join([str(a) for a in bbox]) + '\n')


if __name__ == "__main__":

    json_floder_path = 'C:/Users/labels/json/'
    # json文件夹的绝对路径
    json_names = os.listdir(json_floder_path)
    for json_name in json_names:
        decode_json(json_floder_path, json_name)

本文来自互联网用户投稿，该文观点仅代表作者本人，不代表本站立场。本站仅提供信息存储空间服务，不拥有所有权，不承担相关法律责任。如若转载，请注明出处：http://www.coloradmin.cn/o/564206.html

如若内容造成侵权/违法违规/事实不符，请联系多彩编程网进行投诉反馈，一经查实，立即删除！