在训练yolo的过程中,难免涉及标注的数据格式转化,经过了几次修改和迭代,最终把转化代码跟大家一起分享。
先把xml转txt部分的代码分享一下,py_convert_xml2txt.py:
# -*- coding:utf-8 -*-
import os
import shutil
import cv2
import numpy as np
import xml.etree.ElementTree as ET
wordname_coco = ['person','bicycle','car','motorbike','aeroplane','bus','train','truck','boat','traffic light','fire hydrant',
'stop sign','parking meter','bench','bird','cat','dog','horse','sheep','cow','elephant','bear', 'zebra','giraffe',
'backpack','umbrella','handbag','tie','suitcase','frisbee','skis','snowboard','sports ball','kite','baseball bat',
'baseball glove','skateboard','surfboard','tennis racket','bottle','wine glass','cup','fork','knife','spoon','bowl',
'banana','apple','sandwich','orange','broccoli','carrot','hot dog','pizza','donut','cake','chair','sofa','potted plant',
'bed','dining table','toilet','tv','laptop','mouse','remote','keyboard','cell phone','microwave','oven','toaster','sink',
'refrigerator','book','clock','vase','scissors','teddy bear','hair drier','toothbrush']
wordname_voc = ['aeroplane','bicycle','bird','boat','bottle','bus','car','cat','chair','cow','diningtable','dog','horse',
'motorbike','person','pottedplant','sheep','sofa','sheep','train','tvmonitor']
wordname_zkrc = ['person','car','cat','dog']
#person car truck cat dog
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return (x, y, w, h)
def convert_xml2txt_voc():
#text_dir = './data/JPEGImages/'
xml_dir = "./data/LR_person_car_dataset/train/"
imagedir = "./data/LR_person_car_dataset/train/"
out_txt_dir = './InfraredData/txt/'
out_temp_txt_dir = './InfraredData/temp_Txt/'
out_images_dir = './InfraredData/Images/'
title = "20240823_4_"
count = 0
list = os.listdir(xml_dir) # 列出文件夹下所有的目录与文件
for i in range(0, len(list)):
name = os.path.splitext(list[i])
if name[1] == '.xml':
print(list[i])
#changeTitle = title + str(i + 1) + "_"
xml_path = xml_dir + list[i]
jpg_path = imagedir + name[0] + '.jpg'
if not os.path.isfile(jpg_path):
jpg_path = imagedir + name[0] + '.jpeg'
if not os.path.isfile(jpg_path):
jpg_path = imagedir + name[0] + '.bmp'
if not os.path.isfile(jpg_path):
jpg_path = imagedir + name[0] + '.png'
image = cv2.imread(jpg_path)
if image is None:
continue
# if not os.path.isfile(jpg_path):
# continue
count = count + 1
changeTitle = title + str(count) + "_"
out_file_name = out_temp_txt_dir + changeTitle + '.txt'
out_jpg_name = out_images_dir + changeTitle + '.jpg'
out_file = open(out_file_name, 'w')
tree = ET.parse(xml_path)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
num = 0
for obj in root.iter('object'):
cls = obj.find('name').text
# print(cls)
# num += 1
# cls_id = 0#classes.index(cls)
#cls_id = wordname_voc.index(cls)
cls_id = -1
if cls == 'person':
cls_id = 0
elif cls == 'car':
cls_id = 1
elif cls == 'cat':
cls_id = 2
elif cls == 'dog':
cls_id = 3
else:
continue
num += 1
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
# left = int(xmlbox.find('xmin').text)
# top = int(xmlbox.find('ymin').text)
# right = int(xmlbox.find('xmax').text)
# bottom = int(xmlbox.find('ymax').text)
# font = cv2.FONT_HERSHEY_SIMPLEX
# cv2.putText(image, cls, (left, top - 10), font, 1.2, (255, 255, 255), 2)
# cv2.rectangle(image, (left, top), (right, bottom), (0, 0, 255), 2)
# cv2.imshow("image", image)
# cv2.waitKey(0)
out_file.close()
if num > 0:
cv2.imwrite(out_jpg_name, image)
shutil.copy(out_file_name, out_txt_dir)
# if num > 1:
# shutil.copy(jpg_path, out_images_dir)
def check_txt_ok():
test_dir = './InfraredData/test/'
image_dir = "./InfraredData/Images/"
txt_dir = './InfraredData/txt/'
list = os.listdir(image_dir) # 列出文件夹下所有的目录与文件
for i in range(0, len(list)):
name = os.path.splitext(list[i])
#print(name)
#if name[1] == '.jpg':
#print(name[1])
count = 0
if name[1] == '.jpg':
jpg_path = image_dir + list[i]
image = cv2.imread(jpg_path)
shape = image.shape
print(shape)
# cv2.imshow("image",imaga)
# cv2.waitKey(0)
#print(name[1])
filename = txt_dir + name[0] + '.txt'
if os.path.isfile(filename):
with open(filename, 'r') as file_to_read:
while True:
lines = file_to_read.readline() # 整行读取数据
if not lines:
break
pass
curLine = lines.split(" ")
#floatLine = map(float, lines)
# for i in len(lines):
# print(lines[i])
#print(curLine[0],curLine[1],curLine[2],curLine[3],curLine[4])
numbers = [float(x) for x in curLine]
numbers[0] = numbers[0]
numbers[1] = numbers[1]*shape[1]
numbers[2] = numbers[2] * shape[0]
numbers[3] = numbers[3]*shape[1]/2
numbers[4]= numbers[4] * shape[0]/2
rect_info = [int(x) for x in numbers]
#print(rect_info[0],rect_info[1], rect_info[2], rect_info[3], rect_info[4])
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.putText(image, wordname_zkrc[rect_info[0]], (rect_info[1] - rect_info[3],rect_info[2] - rect_info[4] - 10), font, 1.2, (255, 255, 255), 2)
cv2.rectangle(image,(rect_info[1] - rect_info[3],rect_info[2] - rect_info[4]),
(rect_info[1] + rect_info[3],rect_info[2] + rect_info[4]), (0, 0, 255), 2)
# saveFile = out_path + '/' + name[0] + '.jpg'
#cv2.imwrite(saveFile,image)
# count = count + 1
# if count == 1:
# return
saveFile = test_dir + name[0] + '.jpg'
cv2.imwrite(saveFile,image)
# cv2.imshow("image", image)
# cv2.waitKey(0)
if __name__ == "__main__":
#convert_xml2txt_coco()
#convert_xml2txt_voc()
check_txt_ok()
再把txt转xml部分的代码分享一下,py_convert_txt2xml.py:
import os
import shutil
import xml.etree.ElementTree as ET
from xml.etree.ElementTree import Element, SubElement
from PIL import Image
import cv2
wordname_zkrc = ['person','car','cat','dog']
#wordname_zkrc = ['person','vehicle','animal','object']
class Xml_make(object):
def __init__(self):
super().__init__()
def __indent(self, elem, level=0):
i = "\n" + level * "\t"
if len(elem):
if not elem.text or not elem.text.strip():
elem.text = i + "\t"
if not elem.tail or not elem.tail.strip():
elem.tail = i
for elem in elem:
self.__indent(elem, level + 1)
if not elem.tail or not elem.tail.strip():
elem.tail = i
else:
if level and (not elem.tail or not elem.tail.strip()):
elem.tail = i
def _imageinfo(self, list_top):
annotation_root = ET.Element('annotation')
annotation_root.set('verified', 'no')
tree = ET.ElementTree(annotation_root)
'''
0:xml_savepath 1:folder,2:filename,3:path
4:checked,5:width,6:height,7:depth
'''
folder_element = ET.Element('folder')
folder_element.text = list_top[1]
annotation_root.append(folder_element)
filename_element = ET.Element('filename')
filename_element.text = list_top[2]
annotation_root.append(filename_element)
path_element = ET.Element('path')
path_element.text = list_top[3]
annotation_root.append(path_element)
# checked_element = ET.Element('checked')
# checked_element.text = list_top[4]
# annotation_root.append(checked_element)
source_element = ET.Element('source')
database_element = SubElement(source_element, 'database')
database_element.text = 'Unknown'
annotation_root.append(source_element)
size_element = ET.Element('size')
width_element = SubElement(size_element, 'width')
width_element.text = str(list_top[5])
height_element = SubElement(size_element, 'height')
height_element.text = str(list_top[6])
depth_element = SubElement(size_element, 'depth')
depth_element.text = str(list_top[7])
annotation_root.append(size_element)
segmented_person_element = ET.Element('segmented')
segmented_person_element.text = '0'
annotation_root.append(segmented_person_element)
return tree, annotation_root
def _bndbox(self, annotation_root, list_bndbox):
for i in range(0, len(list_bndbox), 9):
object_element = ET.Element('object')
name_element = SubElement(object_element, 'name')
name_element.text = list_bndbox[i]
# flag_element = SubElement(object_element, 'flag')
# flag_element.text = list_bndbox[i + 1]
pose_element = SubElement(object_element, 'pose')
pose_element.text = list_bndbox[i + 2]
truncated_element = SubElement(object_element, 'truncated')
truncated_element.text = list_bndbox[i + 3]
difficult_element = SubElement(object_element, 'difficult')
difficult_element.text = list_bndbox[i + 4]
bndbox_element = SubElement(object_element, 'bndbox')
xmin_element = SubElement(bndbox_element, 'xmin')
xmin_element.text = str(list_bndbox[i + 5])
ymin_element = SubElement(bndbox_element, 'ymin')
ymin_element.text = str(list_bndbox[i + 6])
xmax_element = SubElement(bndbox_element, 'xmax')
xmax_element.text = str(list_bndbox[i + 7])
ymax_element = SubElement(bndbox_element, 'ymax')
ymax_element.text = str(list_bndbox[i + 8])
annotation_root.append(object_element)
return annotation_root
def txt_to_xml(self, list_top, list_bndbox):
tree, annotation_root = self._imageinfo(list_top)
annotation_root = self._bndbox(annotation_root, list_bndbox)
self.__indent(annotation_root)
tree.write(list_top[0], encoding='utf-8', xml_declaration=True)
def txt_2_xml(source_path, xml_save_dir, jpg_save_dir,txt_dir):
COUNT = 0
for folder_path_tuple, folder_name_list, file_name_list in os.walk(source_path):
for file_name in file_name_list:
file_suffix = os.path.splitext(file_name)[-1]
if file_suffix != '.jpg':
continue
list_top = []
list_bndbox = []
path = os.path.join(folder_path_tuple, file_name)
xml_save_path = os.path.join(xml_save_dir, file_name.replace(file_suffix, '.xml'))
txt_path = os.path.join(txt_dir, file_name.replace(file_suffix, '.txt'))
filename = file_name#os.path.splitext(file_name)[0]
checked = 'NO'
#print(file_name)
im = Image.open(path)
im_w = im.size[0]
im_h = im.size[1]
#shutil.copy(path, jpg_save_dir)
width = str(im_w)
height = str(im_h)
depth = '3'
flag = 'rectangle'
pose = 'Unspecified'
truncated = '0'
difficult = '0'
list_top.extend([xml_save_path, folder_path_tuple, filename, path, checked, width, height, depth])
for line in open(txt_path, 'r'):
line = line.strip()
info = line.split(' ')
if 0 == int(info[0]):
name = wordname_zkrc[0]
elif 1 == int(info[0]):
name = wordname_zkrc[1]
elif 2 == int(info[0]):
name = wordname_zkrc[2]
elif 3 == int(info[0]):
name = wordname_zkrc[3]
else:
name = wordname_zkrc[4]
x_cen = float(info[1]) * im_w
y_cen = float(info[2]) * im_h
w = float(info[3]) * im_w
h = float(info[4]) * im_h
xmin = int(x_cen - w / 2)
ymin = int(y_cen - h / 2)
xmax = int(x_cen + w / 2)
ymax = int(y_cen + h / 2)
if xmin < 0:
xmin = 0
if ymin < 0:
ymin = 0
if xmax > im_w - 1:
xmax = im_w - 1
if ymax > im_h - 1:
ymax = im_h - 1
if w > 4 and h > 4:
list_bndbox.extend([name, flag, pose, truncated, difficult,str(xmin), str(ymin), str(xmax), str(ymax)])
if xmin < 0 or xmax > im_w - 1 or ymin < 0 or ymax > im_h - 1:
print(xml_save_path)
Xml_make().txt_to_xml(list_top, list_bndbox)
COUNT += 1
#print(COUNT, xml_save_path)
if __name__ == "__main__":
out_xml_path = "./4_classData/" # .xml输出文件存放地址
out_jpg_path = "./4_classData/" # .jpg输出文件存放地址
txt_path = "./images_all/" # yolov3标注.txt和图片文件夹
images_path = "./images_all/" # image文件存放地址
txt_2_xml(images_path, out_xml_path, out_jpg_path, txt_path)