文章目录
- (1)WiderPerson数据集详情
- <1> 应用项目
- <2> 数据集地址
- <3> 归属单位
- <4> 详细介绍
- <5> 数据下载及格式介绍
- (2)WiderPerson转YOLO格式
- <1> 文件夹结构
- <2> 数据可视化
- <3> YOLO格式标签转化
(1)WiderPerson数据集详情
<1> 应用项目
人体检测
<2> 数据集地址
http://www.cbsr.ia.ac.cn/users/sfzhang/WiderPerson/
<3> 归属单位
生物测定和安全研究中心(CBSR)、国家模式识别实验室(NLPR)、中国科学院自动化研究所
<4> 详细介绍
WiderPerson数据集是一个在野外的行人检测基准数据集,其中的图像是从广泛的场景中选择的,不再局限于交通场景。选取13382幅图像,标注约400K个各种遮挡的标注。随机选取8000/1000/4382幅图像作为训练、验证和测试子集。与CityPersons和更宽的面数据集类似,我们不发布测试图像的边界框地面真相。用户需要提交最终预测文件,我们将继续进行评估。
<5> 数据下载及格式介绍
Download the WiderPerson dataset through Google Drive or Baidu Drive (uq3u), unzip the downloaded file, get the following files:
# (1)文件目录
• "./Images": 13,382 images of this dataset.
• "./Annotations": 9,000 annotation text files of training and valiadation subsets.
• "./Evaluation": evaluation codes.
• "./train.txt": file list of training subset.
• "./test.txt": file list of testing subset.
• "./val.txt": file list of validation subset.
• "./ReadMe.txt": file of instruction.
# (2)注释格式
“../Images”文件夹(例如000001.jpg)中的训练和有效性子集的每个图像在“../Annotations”文件夹(例如000001)中具有相应的注释文本文件。jpg.txt文件). 注释文件结构的格式如下:
...
< number of annotations in this image = N >
< anno 1 >
< anno 2 >
...
< anno N >
...
其中每行一个对象实例为[class_label,x1,y1,x2,y2],类标签定义为:
...
< class_label =1: pedestrians >
< class_label =2: riders >
< class_label =3: partially-visible persons >
< class_label =4: ignore regions >
< class_label =5: crowd >
...
# (3) 检测输出
每个图像的检测结果应该是一个与图像前缀相同但后缀为“.txt”的文本文件,例如:000001.jpg->000001.txt。所有检测文本文件都应放在文件夹中进行评估。检测输出文件预期如下格式
format:
...
< number of detections in this image = N >
< det 1 >
< det 2 >
...
< det N >
...
每个检测到的边界框的格式应为“[x1,y1,x2,y2,score]”。
(2)WiderPerson转YOLO格式
<1> 文件夹结构
WiderPerson
├─ WiderPerson
├─ Annotations
├─ 000040.jpg.txt
├─ .......
├─ Evaluation
├─ Images
├─ 000040.jpg
├─ .......
├─ train.txt
├─ val.txt
├─ test.txt
├─ widerperson_visual.py
├─ widerperson2yolo.py
<2> 数据可视化
widerperson_visual.py
# -*- coding: utf-8 -*-
import os
import cv2
if __name__ == '__main__':
path = './WiderPerson/train.txt'
with open(path, 'r') as f:
img_ids = [x for x in f.read().splitlines()]
for img_id in img_ids: # '000040'
img_path = './WiderPerson/Images/' + img_id + '.jpg'
img = cv2.imread(img_path)
im_h = img.shape[0]
im_w = img.shape[1]
print(img_path)
label_path = img_path.replace('Images', 'Annotations') + '.txt'
print(label_path)
with open(label_path) as file:
line = file.readline()
count = int(line.split('\n')[0]) # 里面行人个数
line = file.readline()
while line:
cls = int(line.split(' ')[0])
print(cls)
# < class_label =1: pedestrians > 行人
# < class_label =2: riders > 骑车的
# < class_label =3: partially-visible persons > 遮挡的部分行人
# < class_label =4: ignore regions > 一些假人,比如图画上的人
# < class_label =5: crowd > 拥挤人群,直接大框覆盖了
if cls == 1 or cls == 3:
xmin = float(line.split(' ')[1])
ymin = float(line.split(' ')[2])
xmax = float(line.split(' ')[3])
ymax = float(line.split(' ')[4].split('\n')[0])
img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2)
line = file.readline()
cv2.imshow('result', img)
cv2.waitKey(0)
<3> YOLO格式标签转化
widerperson2yolo.py
# -*- coding: utf-8 -*-
import os
from PIL import Image
import shutil
# coding=utf-8
def check_charset(file_path):
import chardet
with open(file_path, "rb") as f:
data = f.read(4)
charset = chardet.detect(data)['encoding']
return charset
def convert(size, box0, box1, box2, box3):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box0 + box2) / 2 * dw
y = (box1 + box3) / 2 * dh
w = (box2 - box0) * dw
h = (box3 - box1) * dh
return (x, y, w, h)
def extract_labels_images(outpath_txt, outpath_jpg, ori_data_path, origin_txt_path):
with open(origin_txt_path, 'r') as f:
img_ids = [x for x in f.read().splitlines()]
for img_id in img_ids: # '000040'
img_path = ori_data_path + '/Images/' + img_id + '.jpg'
with Image.open(img_path) as Img:
img_size = Img.size
ans = ''
label_path = img_path.replace('Images', 'Annotations') + '.txt'
outpath = outpath_txt + "\\" + img_id + '.txt'
with open(label_path, encoding=check_charset(label_path)) as file:
line = file.readline()
count = int(line.split('\n')[0]) # 里面行人个数
line = file.readline()
while line:
cls = int(line.split(' ')[0])
if cls == 1:
# if cls == 1 or cls == 3:
xmin = float(line.split(' ')[1])
ymin = float(line.split(' ')[2])
xmax = float(line.split(' ')[3])
ymax = float(line.split(' ')[4].split('\n')[0])
# print(img_size[0], img_size[1], xmin, ymin, xmax, ymax)
bb = convert(img_size, xmin, ymin, xmax, ymax)
ans = ans + '1' + ' ' + ' '.join(str(a) for a in bb) + '\n'
line = file.readline()
with open(outpath, 'w') as outfile:
outfile.write(ans)
# 想保留原文件用copy
# shutil.copy(img_path, outpath_o + '\\' + img_id + '.jpg')
# 直接移动用这个
shutil.move(img_path, outpath_jpg + '\\' + img_id + '.jpg')
def write_label(otxt_path, ntxt_path):
filer = []
for root, dirs, files in os.walk(otxt_path):
for i in files:
otxt = os.path.join(otxt_path, i)
ntxt = os.path.join(ntxt_path, i)
f = open(otxt, 'r', encoding='utf-8')
for line in f.readlines():
if line == '\n':
continue
cls = line.split(" ")
"""
若类别留的是1、2、3,这种多类别,此行代码是将其变为 0、1、2(主要针对YOLO算法的标签要从0开始的这个规定)
而若需要将1、2、3类别都当做是一类, 那么不用解开注释,这段代码也相当于是归类处理了
"""
# cls = '%s'%(int(cls[0])-1) + " " + cls[1]+ " " + cls[2]+ " " + cls[3]+ " " + cls[4]
cls = '0' + " " + cls[1] + " " + cls[2] + " " + cls[3] + " " + cls[4]
filer.append(cls)
with open(ntxt, "a") as f:
for i in filer:
f.write(i)
filer = []
def write_train_val_txt(labels_path, txt_path, image_set):
image_i = []
for image_ids in os.listdir(labels_path + '\\%s' % (image_set)):
_name = image_ids.split(".")[0]
image_i.append(_name)
list_file = open(txt_path + '\\%s.txt' % (image_set), 'a')
for c_id in image_i:
# print(c_id)
list_file.write('./images' + '/%s/%s.jpg\n' % (image_set, c_id))
list_file.close()
if __name__ == '__main__':
print('WiderPerson数据集yolo格式文件抽取程序启动:')
# 原始数据集路径
ori_data_path = './WiderPerson'
# 输出数据子集路径
out_data_path = './WiderPerson/WiderPerson_yolo'
sets = ['train', 'val']
for data_set in sets:
# 第一步:类别抽取
# 抽取类别label存储路径
print('(1)' + data_set + '数据类别抽取中......')
outpath_txt = out_data_path + '/label/' + data_set
# 抽取类别images存储路径
outpath_jpg = out_data_path + '/images/' + data_set
os.makedirs(outpath_txt)
os.makedirs(outpath_jpg)
# 原数据集标签文件路径
origin_txt_path = ori_data_path + '/' + data_set + '.txt'
extract_labels_images(outpath_txt, outpath_jpg, ori_data_path, origin_txt_path)
# 第二步:写label
print('(2)' + data_set + '_label文件写入中......')
otxt_path = out_data_path + "/label/" + data_set
ntxt_path = out_data_path + "/labels/" + data_set
os.makedirs(ntxt_path)
write_label(otxt_path, ntxt_path)
# 第三步:写trian.txt和val.txt
print('(3)' + data_set + '.txt写入中......')
labels_path = out_data_path + "/labels"
txt_path = out_data_path + "/labels"
write_train_val_txt(labels_path, txt_path, data_set)
print('(4)' + '删除多余文件中......')
shutil.rmtree(out_data_path + "/label")
print('数据集抽取完成!!!')