DataWhale AI夏令营 2024大运河杯-数据开发应用创新赛
- 数据增强
- 数据收集
- 打标签
# -*- coding: utf-8 -*-
Created on 2023-04-01 9:08
@author: Fan yi ming
Func: 对于目标检测的数据增强[YOLO](特点是数据增强后标签也要更改)
注意: boxes的标签和坐标一个是int,一个是float,存放的时候要注意处理方式。
import torch
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFile
from torchvision import transforms
import numpy as np
import matplotlib.pyplot as plt
import os
import random
class DataAugmentationOnDetection:
def __init__(self):
super(DataAugmentationOnDetection, self).__init__()
# 以下的几个参数类型中,image的类型全部如下类型
# 参数类型:
def resize_keep_ratio(self, image, boxes, target_size):
参数类型:, boxes:Tensor, target_size:int
功能:将图像缩放到size尺寸,调整相应的boxes,同时保持长宽比(最长的边是target size
old_size = image.size[0:2] # 原始图像大小
# 取最小的缩放比例
ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size))) # 计算原始图像宽高与目标图像大小的比例,并取其中的较小值
new_size = tuple([int(i * ratio) for i in old_size]) # 根据上边求得的比例计算在保持比例前提下得到的图像大小
# boxes 不用变化,因为是等比例变化
return image.resize(new_size, Image.BILINEAR), boxes
def resizeDown_keep_ratio(self, image, boxes, target_size):
""" 与上面的函数功能类似,但它只降低图片的尺寸,不会扩大图片尺寸"""
old_size = image.size[0:2] # 原始图像大小
# 取最小的缩放比例
ratio = min(float(target_size) / (old_size[i]) for i in range(len(old_size))) # 计算原始图像宽高与目标图像大小的比例,并取其中的较小值
ratio = min(ratio, 1)
new_size = tuple([int(i * ratio) for i in old_size]) # 根据上边求得的比例计算在保持比例前提下得到的图像大小
# boxes 不用变化,因为是等比例变化
return image.resize(new_size, Image.BILINEAR), boxes
def resize(self, img, boxes, size):
# ---------------------------------------------------------
# 类型为,boxes:Tensor,size:int
# 功能为:将图像长和宽缩放到指定值size,并且相应调整boxes
# ---------------------------------------------------------
return img.resize((size, size), Image.BILINEAR), boxes
def random_flip_horizon(self, img, boxes, h_rate=1):
# -------------------------------------
# 随机水平翻转
# -------------------------------------
if np.random.random() < h_rate:
transform = transforms.RandomHorizontalFlip(p=1)
img = transform(img)
if len(boxes) > 0:
x = 1 - boxes[:, 1]
boxes[:, 1] = x
return img, boxes
def random_flip_vertical(self, img, boxes, v_rate=1):
# 随机垂直翻转
if np.random.random() < v_rate:
transform = transforms.RandomVerticalFlip(p=1)
img = transform(img)
if len(boxes) > 0:
y = 1 - boxes[:, 2]
boxes[:, 2] = y
return img, boxes
def center_crop(self, img, boxes, target_size=None):
# -------------------------------------
# 中心裁剪 ,裁剪成 (size, size) 的正方形, 仅限图形,w,h
# 这里用比例是很难算的,转成x1,y1, x2, y2格式来计算
# -------------------------------------
w, h = img.size
size = min(w, h)
if len(boxes) > 0:
# 转换到xyxy格式
label = boxes[:, 0].reshape([-1, 1])
x_, y_, w_, h_ = boxes[:, 1], boxes[:, 2], boxes[:, 3], boxes[:, 4]
x1 = (w * x_ - 0.5 * w * w_).reshape([-1, 1])
y1 = (h * y_ - 0.5 * h * h_).reshape([-1, 1])
x2 = (w * x_ + 0.5 * w * w_).reshape([-1, 1])
y2 = (h * y_ + 0.5 * h * h_).reshape([-1, 1])
boxes_xyxy =[x1, y1, x2, y2], dim=1)
# 边框转换
if w > h:
boxes_xyxy[:, [0, 2]] = boxes_xyxy[:, [0, 2]] - (w - h) / 2
boxes_xyxy[:, [1, 3]] = boxes_xyxy[:, [1, 3]] - (h - w) / 2
in_boundary = [i for i in range(boxes_xyxy.shape[0])]
for i in range(boxes_xyxy.shape[0]):
# 判断x是否超出界限
if (boxes_xyxy[i, 0] < 0 and boxes_xyxy[i, 2] < 0) or (
boxes_xyxy[i, 0] > size and boxes_xyxy[i, 2] > size):
# 判断y是否超出界限
elif (boxes_xyxy[i, 1] < 0 and boxes_xyxy[i, 3] < 0) or (
boxes_xyxy[i, 1] > size and boxes_xyxy[i, 3] > size):
boxes_xyxy = boxes_xyxy[in_boundary]
boxes = boxes_xyxy.clamp(min=0, max=size).reshape([-1, 4]) # 压缩到固定范围
label = label[in_boundary]
# 转换到YOLO格式
x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3]
xc = ((x1 + x2) / (2 * size)).reshape([-1, 1])
yc = ((y1 + y2) / (2 * size)).reshape([-1, 1])
wc = ((x2 - x1) / size).reshape([-1, 1])
hc = ((y2 - y1) / size).reshape([-1, 1])
boxes =[xc, yc, wc, hc], dim=1)
# 图像转换
transform = transforms.CenterCrop(size)
img = transform(img)
if target_size:
img = img.resize((target_size, target_size), Image.BILINEAR)
if len(boxes) > 0:
return img,[label.reshape([-1, 1]), boxes], dim=1)
return img, boxes
# ------------------------------------------------------
# 以下img皆为Tensor类型
# ------------------------------------------------------
def random_bright(self, img, u=120, p=1):
# -------------------------------------
# 随机亮度变换
# -------------------------------------
if np.random.random() < p:
alpha = np.random.uniform(-u, u) / 255
img += alpha
img = img.clamp(min=0.0, max=1.0)
return img
def random_contrast(self, img, lower=0.5, upper=1.5, p=1):
# -------------------------------------
# 随机增强对比度
# -------------------------------------
if np.random.random() < p:
alpha = np.random.uniform(lower, upper)
img *= alpha
img = img.clamp(min=0, max=1.0)
return img
def random_saturation(self, img, lower=0.5, upper=1.5, p=1):
# 随机饱和度变换,针对彩色三通道图像,中间通道乘以一个值
if np.random.random() < p:
alpha = np.random.uniform(lower, upper)
img[1] = img[1] * alpha
img[1] = img[1].clamp(min=0, max=1.0)
return img
def add_gasuss_noise(self, img, mean=0, std=0.1):
noise = torch.normal(mean, std, img.shape)
img += noise
img = img.clamp(min=0, max=1.0)
return img
def add_salt_noise(self, img):
noise = torch.rand(img.shape)
alpha = np.random.random() / 5 + 0.7
img[noise[:, :, :] > alpha] = 1.0
return img
def add_pepper_noise(self, img):
noise = torch.rand(img.shape)
alpha = np.random.random() / 5 + 0.7
img[noise[:, :, :] > alpha] = 0
return img
def plot_pics(img, boxes):
# 显示图像和候选框,img是Image.Open()类型, boxes是Tensor类型
label_colors = [(213, 110, 89)]
w, h = img.size
for i in range(boxes.shape[0]):
box = boxes[i, 1:]
xc, yc, wc, hc = box
x = w * xc - 0.5 * w * wc
y = h * yc - 0.5 * h * hc
box_w, box_h = w * wc, h * hc
plt.gca().add_patch(plt.Rectangle(xy=(x, y), width=box_w, height=box_h,
edgecolor=[c / 255 for c in label_colors[0]],
fill=False, linewidth=2))
def get_image_list(image_path):
# 根据图片文件,查找所有图片并返回列表
files_list = []
for root, sub_dirs, files in os.walk(image_path):
for special_file in files:
special_file = special_file[0: len(special_file)]
return files_list
def get_label_file(label_path, image_name):
# 根据图片信息,查找对应的label
fname = os.path.join(label_path, image_name[0: len(image_name) - 4] + ".txt")
data2 = []
if not os.path.exists(fname):
return data2
if os.path.getsize(fname) == 0:
return data2
with open(fname, 'r', encoding='utf-8') as infile:
# 读取并转换标签
for line in infile:
data_line = line.strip("\n").split()
data2.append([float(i) for i in data_line])
return data2
def save_Yolo(img, boxes, save_path, prefix, image_name):
# img: 需要时Image类型的数据, prefix 前缀
# 将结果保存到save path指示的路径中
if not os.path.exists(save_path) or \
not os.path.exists(os.path.join(save_path, "images")):
os.makedirs(os.path.join(save_path, "images"))
os.makedirs(os.path.join(save_path, "labels"))
try:, "images", prefix + image_name))
with open(os.path.join(save_path, "labels", prefix + image_name[0:len(image_name) - 4] + ".txt"), 'w',
encoding="utf-8") as f:
if len(boxes) > 0: # 判断是否为空
# 写入新的label到文件中
for data in boxes:
str_in = ""
for i, a in enumerate(data):
if i == 0:
str_in += str(int(a))
str_in += " " + str(float(a))
f.write(str_in + '\n')
print("ERROR: ", image_name, " is bad.")
def runAugumentation(image_path, label_path, save_path):
image_list = get_image_list(image_path)
for image_name in image_list:
print("dealing: " + image_name)
img =, image_name))
boxes = get_label_file(label_path, image_name)
boxes = torch.tensor(boxes)
# 下面是执行的数据增强功能,可自行选择
# Image类型的参数
DAD = DataAugmentationOnDetection()
""" 尺寸变换 """
# 缩小尺寸
# t_img, t_boxes = DAD.resizeDown_keep_ratio(img, boxes, 1024)
# save_Yolo(t_img, boxes, save_path, prefix="rs_", image_name=image_name)
# 水平旋转
t_img, t_boxes = DAD.random_flip_horizon(img, boxes.clone())
save_Yolo(t_img, t_boxes, save_path, prefix="fh_", image_name=image_name)
# 竖直旋转
t_img, t_boxes = DAD.random_flip_vertical(img, boxes.clone())
save_Yolo(t_img, t_boxes, save_path, prefix="fv_", image_name=image_name)
# center_crop
t_img, t_boxes = DAD.center_crop(img, boxes.clone(), 1024)
save_Yolo(t_img, t_boxes, save_path, prefix="cc_", image_name=image_name)
""" 图像变换,用tensor类型"""
to_tensor = transforms.ToTensor()
to_image = transforms.ToPILImage()
img = to_tensor(img)
# random_bright
t_img, t_boxes = DAD.random_bright(img.clone()), boxes
save_Yolo(to_image(t_img), boxes, save_path, prefix="rb_", image_name=image_name)
# random_contrast 对比度变化
t_img, t_boxes = DAD.random_contrast(img.clone()), boxes
save_Yolo(to_image(t_img), boxes, save_path, prefix="rc_", image_name=image_name)
# random_saturation 饱和度变化
t_img, t_boxes = DAD.random_saturation(img.clone()), boxes
save_Yolo(to_image(t_img), boxes, save_path, prefix="rs_", image_name=image_name)
# 高斯噪声
t_img, t_boxes = DAD.add_gasuss_noise(img.clone()), boxes
save_Yolo(to_image(t_img), boxes, save_path, prefix="gn_", image_name=image_name)
# add_salt_noise
t_img, t_boxes = DAD.add_salt_noise(img.clone()), boxes
save_Yolo(to_image(t_img), boxes, save_path, prefix="sn_", image_name=image_name)
# add_pepper_noise
t_img, t_boxes = DAD.add_pepper_noise(img.clone()), boxes
save_Yolo(to_image(t_img), boxes, save_path, prefix="pn_", image_name=image_name)
print("end: " + image_name)
if __name__ == '__main__':
# 图像和标签文件夹
image_path = "./yolo-dataset-lwb/val"
label_path = "./yolo-dataset-lwb/val_txt"
save_path = "./yolo-dataset-lwb/save_val" # 结果保存位置路径,可以是一个不存在的文件夹
# 运行
runAugumentation(image_path, label_path, save_path)
参数 | 范围 | 功能 |
hsv_h | 默认0.015,可调范围是0.0~1.0 | 调整图像的色调,引入颜色可变性。 |
hsv_s | 默认0.7,可调范围是0.0~1.0 | 调整图像的饱和度。 |
translate | 默认0.1,可调范围是0.0~1.0 | 平移一小部分图像。 |
scale | 默认0.5,可调范围是大于等于0.0都可以 | 缩放图像。 |
fliplr | 默认为0.5,可调范围是0.0~1.0 | 以指定的概率将图像从左向右翻转,左右镜像。 |
mosaic | 默认为1.0,可调范围是0.0~1.0 | 将四个训练图像组合为一个。 |
auto_augment | 默认为randaugment,可调范围是(randaugment、autoaugment和augmix) | 面向分类任务,自动应用预定义的增强策略。 |
erasing | 默认为0.4,可调范围是0.0~0.9 | 在分类训练过程中随机擦除图像的一部分。 |
crop_fraction | 默认为1.0,可调范围是0.1~1.0 | 将分类图像裁剪到其大小的一小部分。 |
参数 | 范围 | 功能 |
degrees | 范围是-180~+180 | 在指定的度数范围内随机旋转图像。 |
shear | 范围是-180~+180 | 以指定的角度剪切图像。 |
perspective | 范围是0.0~0.001 | 将随机透视变换应用于图像。 |
flipud | 范围是0.0~1.0 | 以指定的概率将图像倒置。 |
bgr | 范围是0.0~1.0 | 以指定的概率将图像通道从RGB翻转到BGR。 |
mixup | 范围是0.0~1.0 | 混合两个图像及其标签,创建合成图像。 |
copy_paste | 范围是0.0~1.0 | 将对象从一个图像中复制并粘贴到另一个图像上。 |
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import warnings
from ultralytics import YOLO
model = YOLO("")
results = model.train(data="yolo-dataset/yolo.yaml", epochs=2, imgsz=1080, batch=16, copy_paste=0.2, mixup=0.5)
conda create -n label python=3.8
conda activate label
pip install labelme
# #
import cv2
import os
import json
import shutil
import numpy as np
from pathlib import Path
from glob import glob
# id2cls = {0: 'clothing'}
# cls2id = {'clothing': 0}
id2cls = {0: '非机动车违停', 1: '机动车违停', 2: '垃圾桶满溢', 3:'违法经营'}
cls2id = {'非机动车违停': 0, '机动车违停': 1, '垃圾桶满溢': 2, '违法经营': 3}
# 支持中文路径
def cv_imread(filePath):
cv_img = cv2.imdecode(np.fromfile(filePath, dtype=np.uint8), flags=cv2.IMREAD_COLOR)
return cv_img
def labelme2yolo_single(img_path, label_file):
anno = json.load(open(label_file, "r", encoding="utf-8"))
shapes = anno['shapes']
w0, h0 = anno['imageWidth'], anno['imageHeight']
image_path = os.path.basename(img_path + anno['imagePath'])
labels = []
for s in shapes:
pts = s['points']
x1, y1 = pts[0]
x2, y2 = pts[1]
x = (x1 + x2) / 2 / w0
y = (y1 + y2) / 2 / h0
w = abs(x2 - x1) / w0
h = abs(y2 - y1) / h0
cid = cls2id[s['label']]
labels.append([cid, x, y, w, h])
return np.array(labels), image_path
def labelme2yolo(img_path, labelme_label_dir, save_dir='res/'):
labelme_label_dir = str(Path(labelme_label_dir)) + '/'
save_dir = str(Path(save_dir))
yolo_label_dir = save_dir + '/'
if not os.path.exists(yolo_label_dir):
json_files = glob(labelme_label_dir + '*.json')
for ijf, jf in enumerate(json_files):
print(ijf + 1, '/', len(json_files), jf)
filename = os.path.basename(jf).rsplit('.', 1)[0]
labels, image_path = labelme2yolo_single(img_path, jf)
if len(labels) > 0:
# 在这里我们对np.savetxt的调用做了修改
np.savetxt(yolo_label_dir + filename + '.txt', labels, fmt='%d %0.16f %0.16f %0.16f %0.16f')
# shutil.copy(labelme_label_dir + image_path, yolo_image_dir + image_path)
if __name__ == '__main__':
img_path = './yolo-dataset-lwb/val' # 数据集图片的路径
json_dir = './yolo-dataset-lwb/val_json' # json标签的路径
save_dir = './yolo-dataset-lwb/val_txt' # 保存的txt标签的路径
labelme2yolo(img_path, json_dir, save_dir)