import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet50, resnet18, resnet34

class ProbabilisticModel(nn.Module):
    def __init__(self, feature_dim=128):
        super(ProbabilisticModel, self).__init__()

        # Define the layers of the ResNet model
        self.f = []
        for name, module in resnet18().named_children():
            if name == 'conv1':
                module = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
            if not isinstance(module, nn.Linear) and not isinstance(module, nn.MaxPool2d):
                self.f.append(module)
        self.f = nn.Sequential(*self.f)

        # Projection head for feature
        self.g = nn.Sequential(
            nn.Linear(512, 512, bias=False), 
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), 
            nn.Linear(512, feature_dim, bias=True)
        )

        # Additional layer for kappa (concentration parameter)
        self.kappa_head = nn.Sequential(
            nn.Linear(512, 512, bias=False),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Linear(512, 1, bias=True)  # Outputs kappa for each sample
        )
        # Learnable normalization constant for the vMF distribution


    def forward(self, x):
        x = self.f(x)
        feature = torch.flatten(x, start_dim=1)
        out = self.g(feature)
        kappa = self.kappa_head(feature)  # Compute kappa for each sample
        # Normalize the feature vector and return it with variance and kappa
        return F.normalize(out, dim=-1), F.softplus(kappa.squeeze(-1))  # Ensure kappa is a 1D tensor
