数据集Chest X-ray PD Dataset 数据集介绍 - 知乎
https://zhuanlan.zhihu.com/p/661311561
CPU版本
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import pandas as pd
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from time import time
start_time = time()
# 定义数据集类
class ChestXRayDataset(Dataset):
def __init__(self, csv_path, root_dir, transform=None):
self.data_info = pd.read_csv(csv_path)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.data_info)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir, self.data_info.iloc[idx, 0])
image = Image.open(img_name).convert('RGB')
label = self.data_info.iloc[idx, 1]
if label == 'covid':
label = 0
elif label == 'normal':
label = 1
else:
label = 2
if self.transform:
image = self.transform(image)
return image, label
# 数据预处理
data_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 加载数据
root_dir = r'E:\NiuCode\LianXi\data\Chest X-ray\DataSet'
csv_path = r'E:\NiuCode\LianXi\data\Chest X-ray\metadata.csv'
dataset = ChestXRayDataset(csv_path, root_dir, transform=data_transform)
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
# 创建数据加载器
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
print("==========")
# 定义CNN模型
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32 * 56 * 56, 128)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(128, 3)
def forward(self, x):
out = self.pool1(self.relu1(self.conv1(x)))
out = self.pool2(self.relu2(self.conv2(out)))
out = out.view(-1, 32 * 56 * 56)
out = self.relu3(self.fc1(out))
out = self.fc2(out)
return out
model = CNNModel()
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
running_loss = 0.0
for i, (images, labels) in enumerate(train_loader):
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
# 测试模型
softmax = nn.Softmax(dim=1) # 定义softmax函数,在类别维度上进行操作
correct = 0
total = 0
all_probabilities = [] # 用于存储所有样本的概率
with torch.no_grad():
for images, labels in test_loader:
outputs = model(images)
probabilities = softmax(outputs) # 将模型输出转换为概率分布
all_probabilities.extend(probabilities.cpu().numpy()) # 存储概率
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Accuracy of the model on the test set: {100 * correct / total}%')
# 打印前几个样本的概率
print("Probabilities for the first few samples:")
for i in range(min(5, len(all_probabilities))):
print(f"Sample {i+1}: {all_probabilities[i]}")
end_time = time()
print("消耗时间={}".format(end_time - start_time))
GPU版本
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
import pandas as pd
import os
from PIL import Image
from sklearn.model_selection import train_test_split
from time import time
start_time = time()
# 定义数据集类
class ChestXRayDataset(Dataset):
def __init__(self, csv_path, root_dir, transform=None):
self.data_info = pd.read_csv(csv_path)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
return len(self.data_info)
def __getitem__(self, idx):
img_name = os.path.join(self.root_dir, self.data_info.iloc[idx, 0])
image = Image.open(img_name).convert('RGB')
label = self.data_info.iloc[idx, 1]
if label == 'covid':
label = 0
elif label == 'normal':
label = 1
else:
label = 2
if self.transform:
image = self.transform(image)
return image, label
# 数据预处理
data_transform = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# 加载数据
root_dir = r'E:\NiuCode\LianXi\data\Chest X-ray\DataSet'
csv_path = r'E:\NiuCode\LianXi\data\Chest X-ray\metadata.csv'
dataset = ChestXRayDataset(csv_path, root_dir, transform=data_transform)
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
# 创建数据加载器
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
print("==========")
# 定义CNN模型
class CNNModel(nn.Module):
def __init__(self):
super(CNNModel, self).__init__()
self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
self.relu1 = nn.ReLU()
self.pool1 = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
self.relu2 = nn.ReLU()
self.pool2 = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(32 * 56 * 56, 128)
self.relu3 = nn.ReLU()
self.fc2 = nn.Linear(128, 3)
def forward(self, x):
out = self.pool1(self.relu1(self.conv1(x)))
out = self.pool2(self.relu2(self.conv2(out)))
out = out.view(-1, 32 * 56 * 56)
out = self.relu3(self.fc1(out))
out = self.fc2(out)
return out
# 检查 CUDA 是否可用
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
model = CNNModel()
# 将模型移到设备上
model.to(device)
# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# 训练模型
num_epochs = 10
for epoch in range(num_epochs):
running_loss = 0.0
for i, (images, labels) in enumerate(train_loader):
# 将数据移到设备上
images = images.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}')
# 测试模型
softmax = nn.Softmax(dim=1) # 定义softmax函数,在类别维度上进行操作
correct = 0
total = 0
all_probabilities = [] # 用于存储所有样本的概率
with torch.no_grad():
for images, labels in test_loader:
# 将数据移到设备上
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
probabilities = softmax(outputs) # 将模型输出转换为概率分布
all_probabilities.extend(probabilities.cpu().numpy()) # 存储概率
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print(f'Accuracy of the model on the test set: {100 * correct / total}%')
# 打印前几个样本的概率
print("Probabilities for the first few samples:")
for i in range(min(5, len(all_probabilities))):
print(f"Sample {i+1}: {all_probabilities[i]}")
end_time = time()
print("消耗时间={}".format(end_time-start_time))
CPU版本耗时:1310.6518561840057
GPU版本耗时:70.60973024368286
正确率:100%