import torch
import torch.nn as nn
import torch.nn.functional as F

def pgd(model, X, y, epsilon=8/255, alpha=2/255, steps=10, device='cpu'):

    X_adv = X.clone().detach()
    loss = nn.CrossEntropyLoss()

    X_adv = X_adv + torch.empty_like(X_adv).uniform_(-epsilon, epsilon)
    X_adv = torch.clamp(X_adv, min=0, max=1).detach()

    for _ in range(steps):
        X_adv.requires_grad = True
        model.zero_grad()

        outputs = model(X_adv)
        cost = loss(outputs, y)
        cost.backward()

        X_adv = X_adv.detach() + alpha * X_adv.grad.sign()
        delta = torch.clamp(X_adv - X, min=-epsilon, max=epsilon)
        X_adv = torch.clamp(X + delta, min=0, max=1).detach()

    return X_adv