In [1]:
# Uncomment line below to install exlib
# !pip install exlib
In [ ]:
import torch
import yaml
import argparse
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, "../../src")
import exlib
import math
import torch.nn.functional as F

from datasets import load_dataset
from collections import namedtuple
from exlib.datasets.supernova import SupernovaDataset, SupernovaClsModel, SupernovaFixScore, get_supernova_scores
from exlib.datasets.supernova_helper import *
from tqdm.auto import tqdm

# Baselines
from exlib.features.time_series import *
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

Overview¶

  • The objective is to classify astronomical sources that vary with time into different classes

Load datasets and pre-trained models¶

In [3]:
test_dataset = SupernovaDataset(data_dir = "anonymized-dataset", split="test")
model = SupernovaClsModel(model_path = "anonymized-model")
num labels: 14
Using Fourier PE
classifier dropout: 0.2

Model prediction¶

In [4]:
model = model.to(device)
test_dataloader = create_test_dataloader(
    dataset=test_dataset,
    batch_size=5,
    compute_loss=True
)
original dataset size: 792
remove nans dataset size: 792
In [5]:
# model prediction
with torch.no_grad():
    y_true = []
    y_pred = []
    alignment_scores_all = []
    for bi, batch in tqdm(enumerate(test_dataloader), total=len(test_dataloader)):
        batch = {k: v.to(device) for k, v in batch.items() if k != "objid"}
        outputs = model(**batch)
        y_true.extend(batch['labels'].cpu().numpy())
        y_pred.extend(torch.argmax(outputs.logits, dim=2).squeeze().cpu().numpy())
# model prediction
print(f"accuracy: {sum([1 for i, j in zip(y_true, y_pred) if i == j]) / len(y_true)}")
100%|███████████████████████████████████████████████████████████████████████████████| 159/159 [00:01<00:00, 131.68it/s]
accuracy: 0.7967171717171717

Feature alignment¶

In [6]:
test_dataloader = create_test_dataloader_raw(
    dataset=test_dataset,
    batch_size=5,
    compute_loss=True
)
original dataset size: 792
remove nans dataset size: 792

Baselines¶

  • Identity
  • Random
  • 5 slices
  • 10 slices
  • 15 slices
  • Clustering
  • Archipelago
In [7]:
scores = get_supernova_scores(batch_size = 5)
original dataset size: 792
remove nans dataset size: 792
num labels: 14
Using Fourier PE
classifier dropout: 0.2
100%|████████████████████████████████████████████████████████████████████████████████| 159/159 [04:37<00:00,  1.74s/it]
Avg alignment of identity features: 0.0152
Avg alignment of random features: 0.0358
Avg alignment of 5 features: 0.0337
Avg alignment of 10 features: 0.0555
Avg alignment of 15 features: 0.0554
Avg alignment of clustering features: 0.2622
Avg alignment of archipelago features: 0.2563

In [ ]: