本文介绍pa100k数据集,是从GitHub上paddleCLA工程中提供的路径下载,针对该数据集进行介绍:
01 - 图像信息
- 训练集 - 80000张图像
- 验证集 - 10000张图像
- 测试集 - 10000张图像
02 - 标签信息
· 属性1:有无帽子 - [0]
· 属性2:有无眼镜 - [1]
· 属性3:上衣类型 - [2-7]
· 属性4:下衣类型 - [8-13]
· 属性5:是否有鞋子 - [14]
· 属性6:包的类型 - [15-17]
· 属性7:有否手持物品在前 - [18]
· 属性8:年龄 - [19-21]
· 属性9: 是否为女性 - [22]
· 属性10:人体朝向 - [23-25]
0 = Hat - 帽子:0无1有
1 = Glasses - 眼镜:0无1有
2 = ShortSleeve - 短袖
3 = LongSleeve - 长袖
4 = UpperStride - 有条纹
5 = UpperLogo - 印有logo/图案
6 = UpperPlaid - 撞色衣服(多种颜色)
7 = UpperSplice - 格子衫
8 = LowerStripe - 有条纹
9 = LowerPattern - 印有图像
10 = LongCoat - 长款大衣
11 = Trousers - 长裤
12 = Shorts - 短裤
13 = Skirt&Dress - 裙子/连衣裙
14 = boots - 鞋子
15 = HandBag - 手提包
16 = ShoulderBag - 单肩包
17 = Backpack - 背包
18 = HoldObjectsInFront - 手持物品
19 = AgeOver60 - 大于60
20 = Age18-60 - =18~60
21 = AgeLess18 - 小于18
22 = Female - 0:男性; 1:女性
23 = Front - 人体朝前
24 = Side - 人体朝侧
25 = Back - 人体朝后
针对val图像,根据属性对图像分类:
实现代码如下:
import os
import tqdm
import shutil
def mkdir(path):
if not os.path.exists(path):
os.mkdir(path)
def find_indices(lst, value):
"""
# - 返回所有值 = value的索引
"""
return [i for i, x in enumerate(lst) if x == value]
def pa100k():
"""
# - 由val_list.txt, 根据属性分文件夹
"""
"01 - 读取标签"
labels = ["Hat", "Glasses", "ShortSleeve", "LongSleeve", "UpperStride", "UpperLogo", "UpperPlaid", "UpperSplice",
"LowerStripe", "LowerPattern", "LongCoat", "Trousers", "Shorts", "Skirt", "boots", "HandBag", "ShoulderBag",
"Backpack", "HoldObjectsInFront", "AgeOver60", "Age18-60", "AgeLess18", "Female","Front", "Side", "Back"]
filep = "./val_list.txt"
fr = open(filep, 'r')
lines = fr.readlines()
fr.close()
"02 - 保存结果"
src_img_p = "./"
savep = "./属性分析/"
mkdir(savep)
for index, attr in enumerate(labels):
mkdir(savep + str(index) + '_' + attr)
"03 - 数据处理"
for line in tqdm.tqdm(lines):
list_ = line.split('\t')
name = list_[0]
label = list_[1].strip('\n').split(',')
label_index = find_indices(label, value='1')
for index in label_index:
save_folder = str(index) + '_' + labels[index]
shutil.copyfile(src_img_p + name, savep + save_folder + '/' + name.split('/')[-1])
if __name__ == '__main__':
pa100k()