torchvision中的transforms
是transforms.py工具箱,含有totensor、resize等工具
用于将特定格式的图片转换为想要的图片的结果,即用于图片变换
用法
在transforms中选择一个类创建对象,使用这个对象选择相应方法进行处理
能够选择的类
列表
[
"Compose",
"ToTensor", # 转化为tensor类型
"PILToTensor",
"ConvertImageDtype",
"ToPILImage", # tenor或ndarray转换为PIL image
"Normalize", # 归一化(用均值或标准差归一化tensor类型的image)
"Resize",
"CenterCrop",
"Pad",
"Lambda",
"RandomApply",
"RandomChoice",
"RandomOrder",
"RandomCrop",
"RandomHorizontalFlip",
"RandomVerticalFlip",
"RandomResizedCrop",
"FiveCrop",
"TenCrop",
"LinearTransformation",
"ColorJitter",
"RandomRotation",
"RandomAffine",
"Grayscale",
"RandomGrayscale",
"RandomPerspective",
"RandomErasing",
"GaussianBlur",
"InterpolationMode",
"RandomInvert",
"RandomPosterize",
"RandomSolarize",
"RandomAdjustSharpness",
"RandomAutocontrast",
"RandomEqualize",
"ElasticTransform",
]
使用
- NT
- 关注输入和输出类型
- 多看官方文档(可以进源码去看)
from PIL import Image
from torch.utils.tensorboard import SummaryWriter
from torchvision import transforms
writer = SummaryWriter('logs')
img = Image.open('./dataset2/train/ants_image/5650366_e22b7e1065.jpg')
print(img)
# ToTensor
trans_totensor = transforms.ToTensor()
img_tensor = trans_totensor(img) # 将PIL对象转换为tensor对象
writer.add_image("tensor_img",img_tensor)
# Normalize 归一化:(输入-均值)/标准差
print(img_tensor[0][0][0])
trans_norm = transforms.Normalize([0.5,0.5,0.5],[0.5,0.5,0.5]) # 要提供一个均值一个标准差
img_norm = trans_norm(img_tensor)
print(img_norm[0][0][0])
writer.add_image("norm_img",img_norm)
# Resize 调整尺寸:(h,w)
print(img.size)
trans_resize = transforms.Resize((512,512))
# img PIL -> resize -> img_resize PIL
img_resize = trans_resize(img) # 这里要传入的是PIL image类型
# img_resize PIL -> totensor -> img_resize tensor
img_resize = trans_totensor(img_resize)
print(img_resize)
# Compose - resize 第二种用法: 等比缩放
trans_resize_2 = transforms.Resize(512)
# PIL -> PIL -> tensor
trans_compose = transforms.Compose([trans_resize_2, trans_totensor]) # 从右往左执行,后面参数的输入和前面参数的输出是一致的
img_resize_2 = trans_compose(img)
writer.add_image("Resize", img_resize_2, 1)
# RandomCrop 随机裁剪
trans_random = transforms.RandomCrop(256) # 传一个int:代表正方形,传一个tuple:代表矩形
trans_compose_2 = transforms.Compose([trans_random, trans_totensor])
for i in range(10):
img_crop = trans_compose_2(img)
writer.add_image("RandomCrop", img_crop, i)
writer.close()
进入调试,就会发现tensor_img中有一些反向传播的钩子:
以及使用的设备:
可以理解为tensor类型包含了神经网络训练所需要的一些参数
其他
类中的__call__
方法:对象可以直接用括号时自动用的该方法(区别于普通方法需要用“.”调用)
class Person:
def __call__(self, name):
print("__call__:", name)
def hello(self, name):
print("hello",name)
p = Person()
p("jerry")
p.hello('tom')