使用ppyoloe训练voc数据集(自制的)详细教程
一、数据集准备工作:
Voc数据集的格式:
通过labelimg标注后的数据集如图所示
分别存放原图与xml标注文件
二、在ppdetection的工程下面找到dataset文件夹
其中有许多的参考数据的格式样例
在dataset文件夹下,新建数据集文件夹(自己命名aaaa)
将制作好的数据集复制近来,格式如下:
三、在pcb文件下新建脚本,使用脚本生成代训练的文件
import argparse
import random
import re
from pathlib import Path
from matplotlib import pyplot as plt
annotations_name = 'Annotations'
jpegimages_name = 'JPEGImages'
trainval_name = 'trainval.txt'
text_file_name = 'test.txt'
val_file_name = 'val.txt'
def write_file(datas, file):
results = []
for data in datas:
annotation = Path(annotations_name, f'{data}.xml')
image = Path(jpegimages_name, f'{data}.jpg')
result = f'{image} {annotation}'
results.append(result)
with file.open('w')as f:
f.write("\n".join(results))
print(file, "success")
def make_paddle_voc(dataset):
dataset = Path(dataset)
assert dataset.is_dir(), "Error dataset path"
annotations_path = dataset / annotations_name
images_path = dataset / jpegimages_name
if not (annotations_path.is_dir() and images_path.is_dir()):
print("Please use the following structure:")
print(f"{dataset.name}")
print(f'\t{annotations_name}\n\t\t|--00001.xml\n\t\t|--00002.xml')
print(f'\t{jpegimages_name}\n\t\t|--00001.jpg\n\t\t|--00002.jpg')
raise RuntimeError(f'Error {jpegimages_name} or {annotations_name} dir')
paddle_path = dataset / 'paddle'
paddle_path.mkdir(exist_ok=True, parents=True)
trainval_path = paddle_path / trainval_name
test_path = paddle_path / text_file_name
val_path = paddle_path / val_file_name
annotations = annotations_path.glob('*.xml')
images = images_path.glob('*.jpg')
images_list = [image.name for image in images]
annotations_list = []
for annotation in annotations:
image = annotation.with_suffix('.jpg').name
assert image in images_list, f"{annotation.name} has no image file"
annotations_list.append(annotation.stem)
random.shuffle(annotations_list)
per = round(len(annotations_list) / 10)
trainval = annotations_list[:per * 7] # 此处切分比例为8:1:1 可行修改比例
test = annotations_list[per * 7:per * 9]
val = annotations_list[per * 9:]
write_file(trainval, trainval_path)
write_file(test, test_path)
write_file(val, val_path)
def check_labels(dataset):
dataset = Path(dataset)
annotations_path = dataset / annotations_name
class_list = {}
for annotation_path in annotations_path.glob('*.xml'):
with annotation_path.open('r')as f:
names = re.findall(r'<name>(.+?)</name>', f.read())
for name in names:
if name not in class_list.keys():
class_list[name] = 0
class_list[name] += 1
class_list = dict(sorted(class_list.items()))
label_list = dataset / 'label_list.txt'
with label_list.open('w')as f:
f.write('\n'.join(class_list.keys()))
fig, ax = plt.subplots()
[ax.text(a, b + 1, b, ha='center', va='bottom') for a, b in class_list.items()]
plt.bar(*zip(*class_list.items()))
plt.xticks(rotation=270)
plt.title('Detection category distribution')
plt.xlabel('Class')
plt.ylabel('Number')
plt.tight_layout()
plt.savefig(f"{dataset / '类别分布.jpg'}", format="jpg")
print(f"Save as {dataset / '类别分布.jpg'}")
print(f"共{len(class_list)}类")
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', required=True, help='input dataset path, necessary parameter')
opt = parser.parse_args()
# check_dataset(opt.dataset)
make_paddle_voc(opt.dataset)
check_labels(opt.dataset)
四、修改代码中的配置文件