Skip to main content

attack_util.py

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

### PGD Attack
class PGDAttack():
def __init__(self, attack_step=10, eps=8 / 255, alpha=2 / 255, loss_type='ce', targeted=True,
num_classes=10,device='cuda:0'):
'''
attack_step: number of PGD iterations
eps: attack budget
alpha: PGD attack step size
'''
### Your code here
self.attack_step = attack_step
self.eps = eps
self.alpha = alpha
self.loss_func = self.ce_loss if loss_type == 'ce' else self.cw_loss
self.targeted = targeted
self.num_classes = num_classes
#added device, so that in the loss functions, the calculations won't cause errors (can call .to() to move tensor to device)
self.device = device
### Your code ends

def ce_loss(self, logits, y):
### Your code here

loss = 0.0
# convert y to a tensor of [batch size, num_classes]. for each photo (each element in the tensor array), it's a tensor such that it's 0 for classes other than the true class, and 1 for the true class.
y_transformed = torch.eye(self.num_classes)[y.view(-1)].float().to(self.device)
# print(y_transformed)
# implement CE loss function in slide. Since photos come in batch of 64, get the mean to convert loss to scalar.
# note: log_softmax gets the log of prediction probability
loss = -torch.mean(torch.sum(y_transformed * torch.nn.functional.log_softmax(logits,dim=1), dim=1))
return loss
### Your code ends

def cw_loss(self, logits, y):
### Your code here
#assume tao is 0
t = 0
if self.targeted:
# target is class 1
target_labels = torch.ones_like(y)
target_transformed = torch.eye(self.num_classes)[target_labels.view(-1)].float().to(self.device)

# get the largest logit that is not the target
largest, _ = torch.max((1-target_transformed)*logits, dim=1)
# get the target label logit. Select out ones that correspond to the target label
target = torch.masked_select(logits, target_transformed.bool())

# max among the difference between larget logit label and target, and -tao
# use sum to convert to a scalar for a batch
loss = -torch.clamp((largest-target),min=-t).sum()

return loss
else:
y_transformed = torch.eye(self.num_classes)[y.view(-1)].float().to(self.device)
# print(1-y_transformed)
# get the second largest logit that is not the true label
second_largest, _ = torch.max((1-y_transformed)*logits, dim=1)

# get the true label logit.
true_label = torch.masked_select(logits, y_transformed.bool())
# print("largest")
# print(largest)

# max among the difference between true label and second largest, and -tao
loss = -torch.clamp((true_label-second_largest),min=-t).sum()

return loss
### Your code ends

def perturb(self, model: nn.Module, X, y):
delta = torch.zeros_like(X)

### Your code here

loss_fn = self.loss_func
X.requires_grad = True
# print(X.shape)
# gradient descent for the specified steps
for i in range(self.attack_step):
model.zero_grad()
logits = model(X + delta)
# print(logits)
# print(logits.shape)
# print(y.shape)
# print(y)
# loss = torch.nn.functional.cross_entropy(logits, y)
loss = loss_fn(logits,y)

# get gradient
loss.backward()
# print(logits[:,0]*logits[:,0].shape)
#print(logits)

#descent using sign-based gradient descent
delta = delta + self.alpha*torch.sign(X.grad)
# projection step. For l infinity step, each pixel in delta hat is either unchanged if its absolute value is less than eps, or is equal to epsilon if it's positive, or is equal to negative epsilon if it's negative.
delta = torch.clamp(delta, min=-self.eps,max=self.eps)
X.grad.zero_()

#need to be a valid image
X_adv = delta+X
X_adv = torch.clamp(X_adv,min=0,max=1)
delta = X_adv-X
### Your code ends

return delta


### FGSMAttack
'''
Technically you can transform your PGDAttack to FGSM Attack by controling parameters like `attack_step`.
If you do that, you do not need to implement FGSM in this class.
'''
class FGSMAttack():
def __init__(self, eps=8 / 255, loss_type='ce', targeted=True, num_classes=10):
pass

def perturb(self, model: nn.Module, X, y):
delta = torch.ones_like(X)
### Your code here

### Your code ends
return delta