-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathhw9_training.py
More file actions
286 lines (246 loc) · 11.5 KB
/
hw9_training.py
File metadata and controls
286 lines (246 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# -*- coding: utf-8 -*-
"""hw9_training.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1yp_sZEMKtSRDIdPgOOEdSuvWHYZDH9AT
"""
#Import the necessary modules for the job
import time
from pycocotools.coco import COCO
import numpy as np
import matplotlib.pyplot as plt
import skimage.io as io
from PIL import Image
from torchvision import utils
from torch.utils.data import DataLoader
import os
import torch
from PIL import Image
from torchvision import transforms as tvt
import torch.nn as nn
from sklearn.metrics import confusion_matrix
import seaborn as sns
import torch.nn.functional as F
#Import the ViTHelper.py file
os.chdir("/content/drive/MyDrive/Purdue-First Year/BME 64600/hw9_RussellHo/")
from ViTHelper import MasterEncoder, SelfAttention, AttentionHead
#Class for Dataloader (found in hw4)
class MyDataset(torch.utils.data.Dataset):
def __init__(self, dataset):
super().__init__()
self.dataset = dataset
labels = []
for i in range(5):
labels += [i for j in range(1500)] #Append labels based on how data was loaded
self.labels = labels
def __len__(self):
# Return the total number of images
return len(self.dataset)
def __getitem__(self, index):
img = Image.open(self.dataset[index])
img_tensor = tvt.ToTensor()(img) #Convert Image to tensors (C x H x W)
#Ensure all images possess same channels (changing 1 channel images to 3)
if img_tensor.size()[0] == 1:
img_tensor = img_tensor.repeat(3, 1, 1)
# Apply transformations to the image
transform1 = tvt.RandomAffine(degrees = 30, translate = (0.2, 0.2))
transform2 = tvt.ColorJitter(brightness = (0.7, 1), saturation = (0.5, 1), contrast = (0.4, 1))
transform3 = tvt.RandomHorizontalFlip()
transform = tvt.Compose([transform1, transform2, transform3])
# Transform the non-oblique image
trans_tensor = transform(img_tensor)
int_label = self.labels[index]
# Return the tuple: (augmented tensor, integer label)
return trans_tensor, int_label
#Function to create dataset given list
def dataset_appender(dataset, imgIds, coco):
for entry in range(len(imgIds)): #Entry is an integer index
# print(f"Round {entry+1} of {len(imgIds)}")
img = coco.loadImgs(imgIds[entry])[0] #img here is a dictionary
I = os.path.join('train2014', img['file_name']) #Pass full path to the image file when appending to dataset
dataset.append(I)
return dataset
#Data Creator function (Dataset of 5 classes)
def datacreator():
#Section for initializing COCO API for instance annotations
os.chdir("/content/drive/MyDrive/Purdue-First Year/BME 64600/hw9_RussellHo")
# print(os.listdir())
dataType = 'train2014'
annFile = 'annotations/instances_{}.json'.format(dataType)
# initialize COCO api for instance annotations
coco=COCO(annFile)
# # display COCO categories and supercategories
# cats = coco.loadCats(coco.getCatIds())
# nms=[cat['name'] for cat in cats]
# print('COCO categories: \n{}\n'.format(' '.join(nms)))
# nms = set([cat['supercategory'] for cat in cats])
# print('COCO supercategories: \n{}'.format(' '.join(nms)))
# get all images containing given categories
categories_list = ['airplane', 'bus', 'cat', 'dog', 'pizza']
dataset = [] #Initializing an empty dataset
os.chdir('train2014/')
for k in range(len(categories_list)):
catIds = coco.getCatIds(catNms=categories_list[k]);
imgIds = coco.getImgIds(catIds=catIds ); #Initializing imgIds as a list
imgIds = imgIds[0:1500] #1500 training images for each category
dataset = dataset_appender(dataset, imgIds, coco)
# print(len(dataset)) #Dataset should contain 10000 images given the 2014train dataset
# #For loop for iterating over the 3 images in the given class
# list_images = []
# for j in range(3):
# img = Image.open(dataset[np.random.randint(0,len(dataset))]) #Selecting an image belonging to one of the categories at random
# list_images.append(img)
# f, axarr = plt.subplots(1, 3)
# axarr[0].imshow(list_images[0])
# plt.axis('off')
# axarr[1].imshow(list_images[1])
# plt.axis('off')
# axarr[2].imshow(list_images[2])
# plt.axis('off')
# plt.show()
# #Loading a random image from the dataset
# img = Image.open(dataset[np.random.randint(0,len(dataset))])
# img.show()
os.chdir("../")
return dataset
#The training function
def training(net1, mydataloader, device):
net = net1.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(
net.parameters(), lr = 1e-3, betas = (0.9, 0.99) #Learning rate here
)
epochs = 15 #Number of epochs
#Initialize loss graph
loss_graph = []
#Initialize iterations
iteration = 0
iterations = []
for epoch in range(epochs):
running_loss = 0.0
# For loop for mydataloader to process 7500 images (since each class has around 2000)
for count, batch in enumerate(mydataloader):
# print(f"{count+1} out of {int(7500/mydataloader.batch_size)} iterations complete")
inputs, labels = batch
inputs = inputs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if (count+1) % 10 == 0:
print("[epoch: %d, batch: %5d] loss: %.3f" % (epoch+1, count+1, running_loss/100))
loss_graph.append(running_loss/100) #Appending loss onto a list for graphing
running_loss = 0.0
iterations.append(iteration) #Appending number of iterations passed
iteration += 1
return loss_graph, iterations, net
#Method for evaluating the confusion matrix
def confusionmatrix(net, mydataloader, device, path):
#Set network to evaluation mode
net = net.eval()
correct = 0
total = 0
y_pred = []
y = []
# with torch.no_grad():
for data in mydataloader:
images, labels = data #Acquiring image tensors and their respective labels from dataloader
images = images.to(device)
labels = labels.to(device)
outputs = net(images)
_, predicted = torch.max(outputs.data, 1) #Only interested in the labels of the predicted images
total += labels.size(0)
correct += (predicted == labels).sum().item()
# print(predicted)
# print(labels)
for label,prediction in zip(labels,predicted):
y_pred.append(prediction) # list of predicted labels
y.append(label) # list of true labels
# print(y)
print('Accuracy of the network on the test images: %d %%' % (100* correct/total))
cf_matrix = confusion_matrix(y, y_pred) # create the confusion matrix
sns_plot = sns.heatmap(cf_matrix, annot=True, fmt='g', cbar=False) # use heatmap to demonstrate the confusion matrix
fig = sns_plot.get_figure()
#Saving the figure
plt.savefig(os.path.join(path, "confusion_matrix_ViT.jpg"))
plt.show()
class ViT(nn.Module):
def __init__(self, image_size, patch_size, num_channels, num_classes, embedding_size, num_heads, num_layers):
super().__init__()
# Calculate the number of patches and the max sequence length
num_patches = (image_size // patch_size) ** 2
seq_length = num_patches + 1
#Conv2D layer for embedding patches (use kernel size and stride to apply layer onto image without dividing into patches)
self.embedding_conv = nn.Conv2d(num_channels, embedding_size, kernel_size=patch_size, stride=patch_size)
#Initialize class token as a learnable parameter
self.class_token = nn.Parameter(torch.randn(1, 1, embedding_size))
# Setting position embeddings as learnable parameters
self.position_embeddings = nn.Parameter(torch.randn(1, seq_length, embedding_size)) #Dimensions (1, seq_length, embedding_size)
# Transformer encoder
self.encoder = MasterEncoder(seq_length, embedding_size, num_layers, num_heads)
# Final class prediction using the class token and an MLP layer
self.mlp = nn.Linear(embedding_size, num_classes) #Outputs class logits
def forward(self, x):
# Create embeddings using the Conv2D layer
embeddings = self.embedding_conv(x) #First apply Conv2D layer on input image (x)
embeddings = embeddings.view(embeddings.shape[0], -1, embeddings.shape[1]) #Reshape output tensor to have dimensions (batch_size, num_patches, embedding_size)
#Add class token to the sequence
batch_size = x.shape[0]
class_tokens = self.class_token.expand(batch_size, -1, -1) #Expand class token to match batch size
embeddings_with_token = torch.cat([class_tokens, embeddings], dim=1) #add class token to sequence
# Add position embeddings to sequence of embeddings with class token
embeddings_with_token_and_positions = embeddings_with_token + self.position_embeddings
# Pass through the Transformer encoder
transformer_output = self.encoder(embeddings_with_token_and_positions)
#Take the class token (extracted from the transformer's output sequence) and feed it through the MLP
class_token_output = transformer_output[:, 0]
logits = self.mlp(class_token_output) #Produces final Logits
return logits
#Making sure that the images have been resized into 64x64
def image_size():
#Alter the images that are not in 64x64
target_size = (64, 64)
os.chdir("/content/drive/MyDrive/Purdue-First Year/BME 64600/hw9_RussellHo/train2014")
image_count = 1
for image_name in os.listdir():
print(f"Altering {image_count} of {len(os.listdir())} images")
if image_name.lower().endswith(".jpg"):
image = Image.open(image_name)
if image.size != target_size:
new_image = image.resize(target_size)
new_image.save(image_name)
print("Resizing completed for one image")
image_count += 1
#Main Script
def main():
#Obtain 5 classes through datacreator ('airplane', 'bus', 'cat', 'dog', 'pizza')
dataset = datacreator() #directory changed into train2014 here
#Creating an instance from the dataloader class
my_dataset = MyDataset(dataset)
# Wrapping the Dataset within the DataLoader class
mydataloader = DataLoader(my_dataset, shuffle = True, batch_size = 100, num_workers = 1)
# os.chdir("train2014")
#First check if CUDA is available
use_cuda = torch.cuda.is_available()
print("Is CUDA computing available? " + str(use_cuda))
device = torch.device("cuda:0"if use_cuda else "cpu")
#Create instance from Vision Transformer class
vit_model = ViT(image_size=64, patch_size=16, num_channels=3, num_classes=5, embedding_size=256, num_heads=8, num_layers=6)
loss_graph, iterations, vit_model = training(vit_model, mydataloader, device)
#Saving the trained model
torch.save(vit_model.state_dict(), "vit_model.pth")
#Plot out the graph for loss
plt.plot(iterations, loss_graph)
plt.xlabel("Iterations")
plt.ylabel("Loss")
plt.title("Training Loss vs Iterations Vision Transformer")
#Saving the training loss plot
path = "/content/drive/MyDrive/Purdue-First Year/BME 64600/hw9_RussellHo"
plt.savefig(os.path.join(path, "ViT_train_loss.jpg"))
plt.show()
if __name__ == "__main__":
# image_size()
main()