{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"provenance":[],"authorship_tag":"ABX9TyMkProzHFiTdBu39T7eaRyJ"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"41y01KnOpbUj","executionInfo":{"status":"ok","timestamp":1727813043014,"user_tz":240,"elapsed":7285,"user":{"displayName":"胡平波","userId":"14290925349410595096"}},"outputId":"46dc6d5f-7537-4bdf-df8c-850d6158563b"},"outputs":[{"output_type":"stream","name":"stdout","text":["cp: target 'code' is not a directory\n","mkdir: cannot create directory ‘CheXpert’: File exists\n","unzip:  cannot find or open CheXpert-v1.0-small.zip, CheXpert-v1.0-small.zip.zip or CheXpert-v1.0-small.zip.ZIP.\n","Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n","Requirement already satisfied: libauc==1.2.0 in /usr/local/lib/python3.10/dist-packages (1.2.0)\n","Requirement already satisfied: torch>=1.2 in /usr/local/lib/python3.10/dist-packages (from libauc==1.2.0) (2.4.1+cu121)\n","Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from libauc==1.2.0) (1.26.4)\n","Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from libauc==1.2.0) (2.2.2)\n","Requirement already satisfied: Pillow in /usr/local/lib/python3.10/dist-packages (from libauc==1.2.0) (10.4.0)\n","Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from libauc==1.2.0) (1.5.2)\n","Requirement already satisfied: scikit-image in /usr/local/lib/python3.10/dist-packages (from libauc==1.2.0) (0.24.0)\n","Requirement already satisfied: opencv-python in /usr/local/lib/python3.10/dist-packages (from libauc==1.2.0) (4.10.0.84)\n","Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from torch>=1.2->libauc==1.2.0) (3.16.1)\n","Requirement already satisfied: typing-extensions>=4.8.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.2->libauc==1.2.0) (4.12.2)\n","Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.2->libauc==1.2.0) (1.13.3)\n","Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.2->libauc==1.2.0) (3.3)\n","Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.2->libauc==1.2.0) (3.1.4)\n","Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from torch>=1.2->libauc==1.2.0) (2024.6.1)\n","Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->libauc==1.2.0) (2.8.2)\n","Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->libauc==1.2.0) (2024.2)\n","Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->libauc==1.2.0) (2024.2)\n","Requirement already satisfied: scipy>=1.9 in /usr/local/lib/python3.10/dist-packages (from scikit-image->libauc==1.2.0) (1.13.1)\n","Requirement already satisfied: imageio>=2.33 in /usr/local/lib/python3.10/dist-packages (from scikit-image->libauc==1.2.0) (2.35.1)\n","Requirement already satisfied: tifffile>=2022.8.12 in /usr/local/lib/python3.10/dist-packages (from scikit-image->libauc==1.2.0) (2024.9.20)\n","Requirement already satisfied: packaging>=21 in /usr/local/lib/python3.10/dist-packages (from scikit-image->libauc==1.2.0) (24.1)\n","Requirement already satisfied: lazy-loader>=0.4 in /usr/local/lib/python3.10/dist-packages (from scikit-image->libauc==1.2.0) (0.4)\n","Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->libauc==1.2.0) (1.4.2)\n","Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->libauc==1.2.0) (3.5.0)\n","Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->libauc==1.2.0) (1.16.0)\n","Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.2->libauc==1.2.0) (2.1.5)\n","Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.2->libauc==1.2.0) (1.3.0)\n"]}],"source":["!cp /content/drive/MyDrive/ChestDataset/CheXpert-v1.0-small.zip /content/ICLR code\n","!mkdir CheXpert\n","!unzip CheXpert-v1.0-small.zip -d /content/CheXpert/\n","from google.colab import drive\n","drive.mount('/content/drive')\n","!pip install libauc==1.2.0\n"]},{"cell_type":"code","source":["import numpy as np\n","import torch\n","from torch.utils.data import DataLoader, Subset, Dataset\n","import torch.nn.functional as F\n","import torchvision.transforms as tfs\n","import cv2\n","from PIL import Image\n","import pandas as pd\n","\n","from libauc.losses import AUCMLoss, CrossEntropyLoss\n","from libauc.optimizers import PESG, Adam\n","from libauc.models import densenet121, densenet161, densenet169, densenet201\n","from libauc.models import resnet101, resnet152, resnet18, resnet34, resnet50, resnext101_32x8d, resnext50_32x4d, wide_resnet101_2, wide_resnet50_2\n","from libauc.datasets import CheXpert\n","from scipy.stats import bernoulli\n","import datetime\n","import os\n","import sys\n","import time\n","import random\n","import shutil\n","import numpy as np\n","from collections import Counter\n","from tqdm import tqdm, trange\n","\n","#import torchvision.transforms as transforms\n","from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, precision_score, recall_score\n","from sklearn.model_selection import train_test_split\n","\n"],"metadata":{"id":"dZlIDiBMp1rG","executionInfo":{"status":"ok","timestamp":1727812986042,"user_tz":240,"elapsed":24508,"user":{"displayName":"胡平波","userId":"14290925349410595096"}}},"execution_count":2,"outputs":[]},{"cell_type":"code","source":["\n","class CheXpert(Dataset):\n","    '''\n","    Reference:\n","        @inproceedings{yuan2021robust,\n","            title={Large-scale Robust Deep AUC Maximization: A New Surrogate Loss and Empirical Studies on Medical Image Classification},\n","            author={Yuan, Zhuoning and Yan, Yan and Sonka, Milan and Yang, Tianbao},\n","            booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},\n","            year={2021}\n","            }\n","    '''\n","\n","    def __init__(self,\n","                 csv_path,\n","                 image_root_path='',\n","                 image_size=320,\n","                 class_index=0,\n","                 use_frontal=True,\n","                 use_upsampling=True,\n","                 flip_label=False,\n","                 shuffle=True,\n","                 seed=123,\n","                 verbose=True,\n","                 upsampling_cols=['Cardiomegaly', 'Consolidation'],\n","                 train_cols=['Cardiomegaly', 'Edema', 'Consolidation', 'Atelectasis', 'Pleural Effusion'],\n","                 mode='train'):\n","\n","        # load data from csv\n","        self.df = pd.read_csv(csv_path)\n","        self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0-small/', '')\n","        self.df['Path'] = self.df['Path'].str.replace('CheXpert-v1.0/', '')\n","        if use_frontal:\n","            self.df = self.df[self.df['Frontal/Lateral'] == 'Frontal']\n","\n","            #### upsample selected cols (Why?)\n","        if use_upsampling:\n","            assert isinstance(upsampling_cols, list), 'Input should be list!'\n","            sampled_df_list = []\n","            for col in upsampling_cols:\n","                #print('Upsampling %s...' % col)\n","                sampled_df_list.append(self.df[self.df[col] == 1])\n","            self.df = pd.concat([self.df] + sampled_df_list, axis=0)\n","\n","        # impute missing values\n","        for col in train_cols:\n","            if col in ['Edema', 'Atelectasis']:\n","                self.df[col].replace(-1, 1, inplace=True)\n","                self.df[col].fillna(0, inplace=True)\n","            elif col in ['Cardiomegaly', 'Consolidation', 'Pleural Effusion']:\n","                self.df[col].replace(-1, 0, inplace=True)\n","                self.df[col].fillna(0, inplace=True)\n","            else:\n","                self.df[col].fillna(0, inplace=True)\n","\n","        self._num_images = len(self.df)\n","\n","        # 0 --> -1\n","        if flip_label and class_index != -1:  # In multi-class mode we disable this option!\n","            self.df.replace(0, -1, inplace=True)\n","\n","\n","            # shuffle data\n","        if shuffle:\n","            data_index = list(range(self._num_images))\n","            np.random.seed(seed)\n","            np.random.shuffle(data_index)\n","            self.df = self.df.iloc[data_index]\n","\n","        assert class_index in [-1, 0, 1, 2, 3, 4], 'Out of selection!'\n","        assert image_root_path != '', 'You need to pass the correct location for the dataset!'\n","\n","        if class_index == -1:  # 5 classes\n","            #print('Multi-label mode: True, Number of classes: [%d]' % len(train_cols))\n","            self.select_cols = train_cols\n","            self.value_counts_dict = {}\n","            for class_key, select_col in enumerate(train_cols):\n","                class_value_counts_dict = self.df[select_col].value_counts().to_dict()\n","                self.value_counts_dict[class_key] = class_value_counts_dict\n","        else:  # 1 class\n","            self.select_cols = [train_cols[class_index]]  # this var determines the number of classes\n","            self.value_counts_dict = self.df[self.select_cols[0]].value_counts().to_dict()\n","\n","        self.mode = mode\n","        self.class_index = class_index\n","        self.image_size = image_size\n","\n","        self._images_list = [image_root_path + path for path in self.df['Path'].tolist()]\n","        if class_index != -1:\n","            self._labels_list = self.df[train_cols].values[:, class_index].tolist()\n","        else:\n","            self._labels_list = self.df[train_cols].values.tolist()\n","\n","        if verbose:\n","            if class_index != -1:\n","                #print('-' * 30)\n","                if flip_label:\n","                    self.imratio = self.value_counts_dict[1] / (self.value_counts_dict[-1] + self.value_counts_dict[1])\n","                    #print('Found %s images in total, %s positive images, %s negative images' % (\n","                    #self._num_images, self.value_counts_dict[1], self.value_counts_dict[-1]))\n","                    #print('%s(C%s): imbalance ratio is %.4f' % (self.select_cols[0], class_index, self.imratio))\n","                else:\n","                    self.imratio = self.value_counts_dict[1] / (self.value_counts_dict[0] + self.value_counts_dict[1])\n","                    #print('Found %s images in total, %s positive images, %s negative images' % (\n","                    #self._num_images, self.value_counts_dict[1], self.value_counts_dict[0]))\n","                    #print('%s(C%s): imbalance ratio is %.4f' % (self.select_cols[0], class_index, self.imratio))\n","                #print('-' * 30)\n","            else:\n","                #print('-' * 30)\n","                imratio_list = []\n","                for class_key, select_col in enumerate(train_cols):\n","                    imratio = self.value_counts_dict[class_key][1] / (\n","                                self.value_counts_dict[class_key][0] + self.value_counts_dict[class_key][1])\n","                    imratio_list.append(imratio)\n","                    #print('Found %s images in total, %s positive images, %s negative images' % (\n","                    #self._num_images, self.value_counts_dict[class_key][1], self.value_counts_dict[class_key][0]))\n","                    #print('%s(C%s): imbalance ratio is %.4f' % (select_col, class_key, imratio))\n","                    #print()\n","                self.imratio = np.mean(imratio_list)\n","                self.imratio_list = imratio_list\n","                #print('-' * 30)\n","\n","    @property\n","    def class_counts(self):\n","        return self.value_counts_dict\n","\n","    @property\n","    def imbalance_ratio(self):\n","        return self.imratio\n","\n","    @property\n","    def num_classes(self):\n","        return len(self.select_cols)\n","\n","    @property\n","    def data_size(self):\n","        return self._num_images\n","\n","    def image_augmentation(self, image):\n","        ##what is this meaning, why do we need to do this kind of transformation?\n","        img_aug = tfs.Compose([tfs.RandomAffine(degrees=(-15, 15), translate=(0.05, 0.05), scale=(0.95, 1.05),\n","                                                fill=128)])  # pytorch 3.7: fillcolor --> fill\n","        image = img_aug(image)\n","        return image\n","\n","    def __len__(self):\n","        return self._num_images\n","\n","    def __getitem__(self, idx):\n","\n","        image = cv2.imread(self._images_list[idx], 0)\n","        ##transform array to PIL image\n","        image = Image.fromarray(image)\n","        ##why we only do transformation for pictures in train set?\n","        if self.mode == 'train':\n","            image = self.image_augmentation(image)\n","        image = np.array(image)\n","        ##transform one channel gray picture to three-channel RGB pictures, why? using one channel gray picture (that is, a matrix) may be more simple?\n","        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)\n","\n","        # resize and normalize; e.g., ToTensor()\n","        image = cv2.resize(image, dsize=(self.image_size, self.image_size), interpolation=cv2.INTER_LINEAR)\n","        image = image / 255.0\n","        __mean__ = np.array([[[0.485, 0.456, 0.406]]])\n","        __std__ = np.array([[[0.229, 0.224, 0.225]]])\n","        image = (image - __mean__) / __std__\n","        image = image.transpose((2, 0, 1)).astype(np.float32)\n","        ##why use float32? what is reshape(-1)?\n","        if self.class_index != -1:  # multi-class mode\n","            label = np.array(self._labels_list[idx]).reshape(-1).astype(np.float32)\n","        else:\n","            label = np.array(self._labels_list[idx]).reshape(-1).astype(np.float32)\n","\n","        return image, label\n","\n"],"metadata":{"id":"G6otqkh3p2WP","executionInfo":{"status":"ok","timestamp":1727812986043,"user_tz":240,"elapsed":8,"user":{"displayName":"胡平波","userId":"14290925349410595096"}}},"execution_count":3,"outputs":[]},{"cell_type":"code","source":["def GenerateNoise(meanNoise, varianceNoise, SEED, numSample):\n","  torch.manual_seed(SEED)\n","  mean = meanNoise\n","  cov = varianceNoise  # Identity matrix for covariance (can be changed if needed)\n","\n","  # Generate samples from the multivariate normal distribution\n","  normal_samples = torch.distributions.MultivariateNormal(mean, cov).sample((numSample, 224, 224))\n","\n","  # Permute dimensions to get the desired shape (32, 224, 224, 3) to (32, 3, 224, 224)\n","  noise = normal_samples.permute(0, 3, 1, 2)\n","  return noise\n","\n","def set_all_seeds(SEED):\n","    # REPRODUCIBILITY\n","    torch.manual_seed(SEED)\n","    np.random.seed(SEED)\n","    torch.backends.cudnn.deterministic = True\n","    torch.backends.cudnn.benchmark = False\n","\n","# paramaters\n","SEED = 123\n","BATCH_SIZE = 32\n","lr = 1e-4\n","weight_decay = 1e-5\n","epochs = 20\n","\n","meanNoise = torch.tensor([1.5, 1, 0.9])\n","\n","#meanNoise = torch.tensor([0.8, 0.7, 0.6])\n","\n","varianceNoise = torch.eye(3)\n","\n","# dataloader\n","root = '/content/CheXpert/'\n","\n","class_id = 1\n","\n","##explanation for true_test_result and true_valid_result:\n","## the first layer represent the data for class_idx; the second layer represents the data for [p0, p1];\n","##the third layer represents the data for beta\n","## for example, we want to find the metrics for Cardiomegaly when p0=p1=0.55 and beta = -5, we should use\n","##true_test_result[0][0][0], true_valid_result[0][0][0]\n","\n","\n","\n","#beta_value_list_under_different_p0p1 = []\n","#for [p0, p1] in misclass_value_list:\n","#  beta_value_list_under_different_p0p1.append([])\n","#  p0p1_index = misclass_value_list.index([p0, p1])\n","#  beta_val_list = np.linspace(-5, min(p0, p1), 20)\n","#  for beta in beta_val_list:\n","#    beta_value_list_under_different_p0p1[p0p1_index].append(beta)\n","\n","\n","\n","  #class_id: 0:Cardiomegaly, 1:Edema, 2:Consolidation, 3:Atelectasis, 4:Pleural Effusion\n","# Index: -1 denotes multi-label mode including 5 diseases\n","\n","## set values of p0 and p1, where true p0 = 1-p0\n","\n","dataset = CheXpert(csv_path=root + 'valid.csv', image_root_path=root, use_upsampling=False, use_frontal=True,\n","                   image_size=224, mode='valid', class_index=class_id)\n","\n","datasetLoader = torch.utils.data.DataLoader(dataset, batch_size=10, num_workers=2, shuffle=False)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":356},"id":"OWqZZni6p4wc","executionInfo":{"status":"error","timestamp":1727812986465,"user_tz":240,"elapsed":427,"user":{"displayName":"胡平波","userId":"14290925349410595096"}},"outputId":"4fc025c8-97c8-4d42-9765-207bcdb270fc"},"execution_count":4,"outputs":[{"output_type":"error","ename":"FileNotFoundError","evalue":"[Errno 2] No such file or directory: '/content/CheXpert/valid.csv'","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)","\u001b[0;32m<ipython-input-4-770c6256212a>\u001b[0m in \u001b[0;36m<cell line: 61>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     59\u001b[0m \u001b[0;31m## set values of p0 and p1, where true p0 = 1-p0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m dataset = CheXpert(csv_path=root + 'valid.csv', image_root_path=root, use_upsampling=False, use_frontal=True,\n\u001b[0m\u001b[1;32m     62\u001b[0m                    image_size=224, mode='valid', class_index=class_id)\n\u001b[1;32m     63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m<ipython-input-3-3fc513af7dc3>\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, csv_path, image_root_path, image_size, class_index, use_frontal, use_upsampling, flip_label, shuffle, seed, verbose, upsampling_cols, train_cols, mode)\u001b[0m\n\u001b[1;32m     26\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     27\u001b[0m         \u001b[0;31m# load data from csv\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 28\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcsv_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     29\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Path'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Path'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'CheXpert-v1.0-small/'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     30\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Path'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'Path'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreplace\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'CheXpert-v1.0/'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m''\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36mread_csv\u001b[0;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[1;32m   1024\u001b[0m     \u001b[0mkwds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkwds_defaults\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1025\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1026\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m_read\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1027\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1028\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_read\u001b[0;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[1;32m    618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    619\u001b[0m     \u001b[0;31m# Create the parser.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 620\u001b[0;31m     \u001b[0mparser\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextFileReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath_or_buffer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    622\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mchunksize\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0miterator\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[1;32m   1618\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1619\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandles\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mIOHandles\u001b[0m \u001b[0;34m|\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1620\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_engine\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_make_engine\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mengine\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1621\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1622\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/parsers/readers.py\u001b[0m in \u001b[0;36m_make_engine\u001b[0;34m(self, f, engine)\u001b[0m\n\u001b[1;32m   1878\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1879\u001b[0m                     \u001b[0mmode\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;34m\"b\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1880\u001b[0;31m             self.handles = get_handle(\n\u001b[0m\u001b[1;32m   1881\u001b[0m                 \u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1882\u001b[0m                 \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/usr/local/lib/python3.10/dist-packages/pandas/io/common.py\u001b[0m in \u001b[0;36mget_handle\u001b[0;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[1;32m    871\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencoding\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    872\u001b[0m             \u001b[0;31m# Encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m             handle = open(\n\u001b[0m\u001b[1;32m    874\u001b[0m                 \u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    875\u001b[0m                 \u001b[0mioargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/content/CheXpert/valid.csv'"]}]},{"cell_type":"code","source":["\n","\n","### true method\n","\n","\n","\n","def TrueMethod(datasetLoader, meanNoise, varianceNoise, SEED, lr, weight_decay, epochs):\n","  set_all_seeds(SEED)\n","\n","  model = densenet121(pretrained=True, last_activation=None, activations='relu', num_classes=1)\n","  model = model.cuda()\n","\n","\n","  # define loss & optimizer\n","  CELoss = CrossEntropyLoss()\n","  optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n","\n","  # training\n","  best_val_auc = 0\n","  best_val_acc = 0\n","  best_val_precision = 0\n","  best_val_recall = 0\n","  best_val_f1score = 0\n","\n","  for epoch in range(epochs):\n","      for idx, data in enumerate(datasetLoader):\n","        if idx < 20:\n","          noisyInput, label = data\n","          shape = noisyInput.shape\n","          #print(shape)\n","          numSample = shape[0]\n","          #print(numSample)\n","          noise = GenerateNoise(meanNoise, varianceNoise, idx, numSample)\n","\n","          trueInput = noisyInput + noise\n","          noisyInput, trueInput, label = noisyInput.cuda(), trueInput.cuda(), label.cuda()\n","          y_pred = model(trueInput)\n","          loss = CELoss(y_pred, label)\n","          optimizer.zero_grad()\n","          loss.backward()\n","          optimizer.step()\n","\n","          # validation\n","          if idx % 5 == 0:\n","              model.eval()\n","              with torch.no_grad():\n","                  test_pred = []\n","                  test_true = []\n","                  test_pred_label = []\n","                  for jdx, data in enumerate(datasetLoader):\n","                      noisyInput, label = data\n","                      shape = noisyInput.shape\n","                      numSample = shape[0]\n","                      noise = GenerateNoise(meanNoise, varianceNoise, jdx, numSample)\n","                      trueInput = noisyInput + noise\n","                      noisyInput, trueInput = noisyInput.cuda(), trueInput.cuda()\n","                      y_pred = model(trueInput)\n","                      y_pred_label = torch.sigmoid(y_pred) >= 0.5\n","                      test_pred_label.append(y_pred_label.cpu().detach().numpy())\n","                      test_pred.append(y_pred.cpu().detach().numpy())\n","                      test_true.append(label.numpy())\n","\n","                  test_true = np.concatenate(test_true)\n","                  test_pred = np.concatenate(test_pred)\n","                  test_pred_label = np.concatenate(test_pred_label)\n","                  val_auc_mean = roc_auc_score(test_true, test_pred)\n","                  #print(test_pred)\n","                  val_acc_mean = accuracy_score(test_true, test_pred_label)\n","                  val_precision_mean = precision_score(test_true, test_pred_label)\n","                  val_recall_mean = recall_score(test_true, test_pred_label)\n","                  val_f1score_mean = f1_score(test_true, test_pred_label)\n","                  model.train()\n","\n","\n","                  if best_val_auc < val_auc_mean:\n","                      best_val_auc = val_auc_mean\n","                      #torch.save(model.state_dict(), 'ce_pretrained_model.pth')\n","                  if best_val_acc < val_acc_mean:\n","                      best_val_acc = val_acc_mean\n","\n","                  if best_val_precision < val_precision_mean:\n","                      best_val_precision = val_precision_mean\n","\n","                  if best_val_recall < val_recall_mean:\n","                      best_val_recall = val_recall_mean\n","\n","                  if best_val_f1score < val_f1score_mean:\n","                      best_val_f1score = val_f1score_mean\n","\n","\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f' % (epoch, idx, val_auc_mean, best_val_auc))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_ACC=%.4f, Best_Val_ACC=%.4f' % (epoch, idx, val_acc_mean, best_val_acc))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_Precision=%.4f, Best_Val_Precision=%.4f' % (epoch, idx, val_precision_mean, best_val_precision))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_Recall=%.4f, Best_Val_Recall=%.4f' % (epoch, idx, val_recall_mean, best_val_recall))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_f1score=%.4f, Best_Val_f1score=%.4f' % (epoch, idx, val_f1score_mean, best_val_f1score))\n","\n","  true_test_result = [best_val_auc, best_val_acc, best_val_precision, best_val_recall,best_val_f1score]\n","  return true_test_result\n","\n","\n"],"metadata":{"id":"880jdTakp-8n"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["###naive method\n","\n","def NaiveMethod(datasetLoader, meanNoise, varianceNoise, SEED, lr, weight_decay, epochs):\n","  set_all_seeds(SEED)\n","\n","  model = densenet121(pretrained=True, last_activation=None, activations='relu', num_classes=1)\n","  model = model.cuda()\n","\n","  # define loss & optimizer\n","  CELoss = CrossEntropyLoss()\n","  optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n","\n","  # training\n","  best_val_auc = 0\n","  best_val_acc = 0\n","  best_val_precision = 0\n","  best_val_recall = 0\n","  best_val_f1score = 0\n","\n","  for epoch in range(epochs):\n","      for idx, data in enumerate(datasetLoader):\n","        if idx < 20:\n","          noisyInput, label = data\n","          shape = noisyInput.shape\n","          numSample = shape[0]\n","          noise = GenerateNoise(meanNoise, varianceNoise, idx, numSample)\n","\n","          trueInput = noisyInput + noise\n","          noisyInput, trueInput, label = noisyInput.cuda(), trueInput.cuda(), label.cuda()\n","          y_pred = model(noisyInput)\n","          loss = CELoss(y_pred, label)\n","          optimizer.zero_grad()\n","          loss.backward()\n","          optimizer.step()\n","\n","          # validation\n","          if idx % 5 == 0:\n","              model.eval()\n","              with torch.no_grad():\n","                  test_pred = []\n","                  test_true = []\n","                  test_pred_label = []\n","                  for jdx, data in enumerate(datasetLoader):\n","                      noisyInput, label = data\n","                      shape = noisyInput.shape\n","                      numSample = shape[0]\n","                      noise = GenerateNoise(meanNoise, varianceNoise, jdx, numSample)\n","                      trueInput = noisyInput + noise\n","                      noisyInput, trueInput = noisyInput.cuda(), trueInput.cuda()\n","                      y_pred = model(trueInput)\n","                      y_pred_label = torch.sigmoid(y_pred) >= 0.5\n","                      test_pred_label.append(y_pred_label.cpu().detach().numpy())\n","                      test_pred.append(y_pred.cpu().detach().numpy())\n","                      test_true.append(label.numpy())\n","\n","                  test_true = np.concatenate(test_true)\n","                  test_pred = np.concatenate(test_pred)\n","                  test_pred_label = np.concatenate(test_pred_label)\n","                  val_auc_mean = roc_auc_score(test_true, test_pred)\n","                  #print(test_pred)\n","                  val_acc_mean = accuracy_score(test_true, test_pred_label)\n","                  val_precision_mean = precision_score(test_true, test_pred_label)\n","                  val_recall_mean = recall_score(test_true, test_pred_label)\n","                  val_f1score_mean = f1_score(test_true, test_pred_label)\n","                  model.train()\n","\n","\n","                  if best_val_auc < val_auc_mean:\n","                      best_val_auc = val_auc_mean\n","                      #torch.save(model.state_dict(), 'ce_pretrained_model.pth')\n","                  if best_val_acc < val_acc_mean:\n","                      best_val_acc = val_acc_mean\n","\n","                  if best_val_precision < val_precision_mean:\n","                      best_val_precision = val_precision_mean\n","\n","                  if best_val_recall < val_recall_mean:\n","                      best_val_recall = val_recall_mean\n","\n","                  if best_val_f1score < val_f1score_mean:\n","                      best_val_f1score = val_f1score_mean\n","\n","\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f' % (epoch, idx, val_auc_mean, best_val_auc))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_ACC=%.4f, Best_Val_ACC=%.4f' % (epoch, idx, val_acc_mean, best_val_acc))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_Precision=%.4f, Best_Val_Precision=%.4f' % (epoch, idx, val_precision_mean, best_val_precision))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_Recall=%.4f, Best_Val_Recall=%.4f' % (epoch, idx, val_recall_mean, best_val_recall))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_f1score=%.4f, Best_Val_f1score=%.4f' % (epoch, idx, val_f1score_mean, best_val_f1score))\n","\n","  naive_test_result = [best_val_auc, best_val_acc, best_val_precision, best_val_recall,best_val_f1score]\n","  return naive_test_result\n","\n","\n"],"metadata":{"id":"WcSTEOCFqBgJ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def AugmentedCorrectionMethod(datasetLoader, meanNoise, varianceNoise, SEED, lr, weight_decay, epochs, ratioNoisyInput):\n","  set_all_seeds(SEED)\n","\n","  model = densenet121(pretrained=True, last_activation=None, activations='relu', num_classes=1)\n","  model = model.cuda()\n","\n","\n","  # define loss & optimizer\n","  CELoss = CrossEntropyLoss()\n","  optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)\n","\n","  # training\n","  best_val_auc = 0\n","  best_val_acc = 0\n","  best_val_precision = 0\n","  best_val_recall = 0\n","  best_val_f1score = 0\n","\n","  for epoch in range(epochs):\n","      for idx, data in enumerate(datasetLoader):\n","        if idx < 20:\n","          noisyInput, label = data\n","          shape = noisyInput.shape\n","          numSample = shape[0]\n","          noise = GenerateNoise(meanNoise, varianceNoise, idx, numSample)\n","\n","          trueInput = noisyInput + noise\n","          noisyInput, trueInput, label = noisyInput.cuda(), trueInput.cuda(), label.cuda()\n","          if idx < ratioNoisyInput*20:\n","            y_pred = model(noisyInput)\n","          else:\n","            y_pred = model(trueInput)\n","          loss = CELoss(y_pred, label)\n","          optimizer.zero_grad()\n","          loss.backward()\n","          optimizer.step()\n","\n","          # validation\n","          if idx % 5 == 0:\n","              model.eval()\n","              with torch.no_grad():\n","                  test_pred = []\n","                  test_true = []\n","                  test_pred_label = []\n","                  for jdx, data in enumerate(datasetLoader):\n","                      noisyInput, label = data\n","                      shape = noisyInput.shape\n","                      numSample = shape[0]\n","                      noise = GenerateNoise(meanNoise, varianceNoise, jdx, numSample)\n","                      trueInput = noisyInput + noise\n","                      noisyInput, trueInput = noisyInput.cuda(), trueInput.cuda()\n","                      y_pred = model(trueInput)\n","                      y_pred_label = torch.sigmoid(y_pred) >= 0.5\n","                      test_pred_label.append(y_pred_label.cpu().detach().numpy())\n","                      test_pred.append(y_pred.cpu().detach().numpy())\n","                      test_true.append(label.numpy())\n","\n","                  test_true = np.concatenate(test_true)\n","                  test_pred = np.concatenate(test_pred)\n","                  test_pred_label = np.concatenate(test_pred_label)\n","                  val_auc_mean = roc_auc_score(test_true, test_pred)\n","                  #print(test_pred)\n","                  val_acc_mean = accuracy_score(test_true, test_pred_label)\n","                  val_precision_mean = precision_score(test_true, test_pred_label)\n","                  val_recall_mean = recall_score(test_true, test_pred_label)\n","                  val_f1score_mean = f1_score(test_true, test_pred_label)\n","                  model.train()\n","\n","\n","                  if best_val_auc < val_auc_mean:\n","                      best_val_auc = val_auc_mean\n","                      #torch.save(model.state_dict(), 'ce_pretrained_model.pth')\n","                  if best_val_acc < val_acc_mean:\n","                      best_val_acc = val_acc_mean\n","\n","                  if best_val_precision < val_precision_mean:\n","                      best_val_precision = val_precision_mean\n","\n","                  if best_val_recall < val_recall_mean:\n","                      best_val_recall = val_recall_mean\n","\n","                  if best_val_f1score < val_f1score_mean:\n","                      best_val_f1score = val_f1score_mean\n","\n","\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_AUC=%.4f, Best_Val_AUC=%.4f' % (epoch, idx, val_auc_mean, best_val_auc))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_ACC=%.4f, Best_Val_ACC=%.4f' % (epoch, idx, val_acc_mean, best_val_acc))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_Precision=%.4f, Best_Val_Precision=%.4f' % (epoch, idx, val_precision_mean, best_val_precision))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_Recall=%.4f, Best_Val_Recall=%.4f' % (epoch, idx, val_recall_mean, best_val_recall))\n","                  #print(\n","                  #  'Epoch=%s, BatchID=%s, Val_f1score=%.4f, Best_Val_f1score=%.4f' % (epoch, idx, val_f1score_mean, best_val_f1score))\n","\n","  correction_test_result = [best_val_auc, best_val_acc, best_val_precision, best_val_recall,best_val_f1score]\n","  return correction_test_result\n","\n"],"metadata":{"id":"NXEKmxlaqETS"},"execution_count":null,"outputs":[]}]}