In [1]:
# Uncomment line below to install exlib
# !pip install exlib
In [ ]:
import torch
import yaml
import argparse
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, "../../src")
import exlib
import math
import torch.nn.functional as F
from datasets import load_dataset
from collections import namedtuple
from exlib.datasets.supernova import SupernovaDataset, SupernovaClsModel, SupernovaFixScore, get_supernova_scores
from exlib.datasets.supernova_helper import *
from tqdm.auto import tqdm
# Baselines
from exlib.features.time_series import *
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
Overview¶
- The objective is to classify astronomical sources that vary with time into different classes
Load datasets and pre-trained models¶
In [3]:
test_dataset = SupernovaDataset(data_dir = "anonymized-dataset", split="test")
model = SupernovaClsModel(model_path = "anonymized-model")
num labels: 14 Using Fourier PE classifier dropout: 0.2
Model prediction¶
In [4]:
model = model.to(device)
test_dataloader = create_test_dataloader(
dataset=test_dataset,
batch_size=5,
compute_loss=True
)
original dataset size: 792 remove nans dataset size: 792
In [5]:
# model prediction
with torch.no_grad():
y_true = []
y_pred = []
alignment_scores_all = []
for bi, batch in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
batch = {k: v.to(device) for k, v in batch.items() if k != "objid"}
outputs = model(**batch)
y_true.extend(batch['labels'].cpu().numpy())
y_pred.extend(torch.argmax(outputs.logits, dim=2).squeeze().cpu().numpy())
# model prediction
print(f"accuracy: {sum([1 for i, j in zip(y_true, y_pred) if i == j]) / len(y_true)}")
100%|███████████████████████████████████████████████████████████████████████████████| 159/159 [00:01<00:00, 131.68it/s]
accuracy: 0.7967171717171717
Feature alignment¶
In [6]:
test_dataloader = create_test_dataloader_raw(
dataset=test_dataset,
batch_size=5,
compute_loss=True
)
original dataset size: 792 remove nans dataset size: 792
Baselines¶
- Identity
- Random
- 5 slices
- 10 slices
- 15 slices
- Clustering
- Archipelago
In [7]:
scores = get_supernova_scores(batch_size = 5)
original dataset size: 792 remove nans dataset size: 792 num labels: 14 Using Fourier PE classifier dropout: 0.2
100%|████████████████████████████████████████████████████████████████████████████████| 159/159 [04:37<00:00, 1.74s/it]
Avg alignment of identity features: 0.0152 Avg alignment of random features: 0.0358 Avg alignment of 5 features: 0.0337 Avg alignment of 10 features: 0.0555 Avg alignment of 15 features: 0.0554 Avg alignment of clustering features: 0.2622 Avg alignment of archipelago features: 0.2563
In [ ]: