import os
import cv2
import pywt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from metrics.base_metrics_class import calculate_metrics_for_train

from .base_detector import AbstractDetector
from detectors import DETECTOR
from networks import BACKBONE
from loss import LOSSFUNC

import logging
logger = logging.getLogger(__name__)


@DETECTOR.register_module(module_name='wavelet_4ch')
class Wavelet4ChDetector(AbstractDetector):
    def __init__(self, config):
        super().__init__()
        self.config = config

        # Build a single backbone that accepts 4-channel input:
        # 3-channel RGB concatenated with 1-channel wavelet-denoised image.
        self.backbone = self.build_backbone(config)

        # Wavelet denoising extractor (operates on grayscale).
        self.wavelet_extractor = WaveletDenoiseExtractor(
            wavelet='db1',  # choose desired wavelet type
            level=3         # wavelet decomposition level
        )

        # Loss function.
        self.loss_func = self.build_loss(config)

        # Recorders for inference.
        self.prob, self.label = [], []
        self.correct, self.total = 0, 0



    def build_backbone(self, config):
        """
        Build a backbone model that can handle 4 input channels 
        by adapting the pretrained Xception 'conv1.weight'.
        """
        backbone_class = BACKBONE[config['backbone_name']]
        model_config = config['backbone_config']
        backbone = backbone_class(model_config)

        # Load the pretrained Xception state dict
        state_dict = torch.load(config['pretrained'], map_location='cpu')
        for name, weights in list(state_dict.items()):
            # Some models store "pointwise" conv as [out_chan, in_chan], so expand if needed
            if 'pointwise' in name and weights.ndim == 2:
                state_dict[name] = weights.unsqueeze(-1).unsqueeze(-1)

        # Remove the classification layer if it doesn't match
        state_dict = {k: v for k, v in state_dict.items() if 'fc' not in k and 'classifier' not in k}

        # Remove original conv1 weights (which were for 3-ch input)
        conv1_data = state_dict.pop('conv1.weight')

        # Load the remaining partial weights
        missing, unexpected = backbone.load_state_dict(state_dict, strict=False)
        logger.info(
            f"Loaded pretrained model from {config['pretrained']}. "
            f"Missing: {missing}, Unexpected: {unexpected}"
        )

        # Now replace conv1 with a 4-channel version
        backbone.conv1 = nn.Conv2d(
            in_channels=4, 
            out_channels=32, 
            kernel_size=3, 
            stride=2, 
            padding=0, 
            bias=False
        )

        # Initialize the new 4-ch conv1
        with torch.no_grad():
            # Average across the original 3 channels => shape [32,1,3,3]
            avg_conv1_data = conv1_data.mean(dim=1, keepdim=True)
            # Repeat for 4 channels => shape [32,4,3,3]
            backbone.conv1.weight.data = avg_conv1_data.repeat(1, 4, 1, 1)

        logger.info("Modified conv1 to accept 4 input channels.")
        return backbone

    def build_loss(self, config):
        loss_class = LOSSFUNC[config['loss_func']]
        return loss_class()

    def features(self, data_dict: dict) -> torch.Tensor:
        """
        1) Extract the original RGB image (B,3,H,W).
        2) Extract the wavelet-denoised grayscale image (B,1,H,W).
        3) Concatenate along the channel dimension to form a 4-channel input.
        4) Pass the 4-channel tensor through the backbone to extract features.
        """
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        x_rgb = data_dict['image'].to(device)         # (B,3,H,W)
        x_wavelet = self.wavelet_extractor(x_rgb).to(device)  # (B,1,H,W)
        combined = torch.cat([x_rgb, x_wavelet], dim=1)  # (B,4,H,W)
        feats = self.backbone.features(combined)
        return feats

    def classifier(self, features: torch.Tensor) -> torch.Tensor:
        return self.backbone.classifier(features)

    def get_losses(self, data_dict: dict, pred_dict: dict) -> dict:
        label = data_dict['label']
        pred = pred_dict['cls']
        loss = self.loss_func(pred, label)
        return {'overall': loss}

    def get_train_metrics(self, data_dict: dict, pred_dict: dict) -> dict:
        label = data_dict['label']
        pred = pred_dict['cls']
        auc, eer, acc, ap = calculate_metrics_for_train(label.detach(), pred.detach())
        return {'acc': acc, 'auc': auc, 'eer': eer, 'ap': ap}

    def forward(self, data_dict: dict, inference=False) -> dict:
        feats = self.features(data_dict)
        pred = self.classifier(feats)
        prob = torch.softmax(pred, dim=1)[:, 1]
        pred_dict = {'cls': pred, 'prob': prob, 'feat': feats}

        if inference:
            self.prob.append(prob.detach().cpu().numpy())
            self.label.append(data_dict['label'].detach().cpu().numpy())
            _, prediction_class = torch.max(pred, 1)
            correct = (prediction_class == data_dict['label']).sum().item()
            self.correct += correct
            self.total += data_dict['label'].size(0)

        return pred_dict


class WaveletDenoiseExtractor(nn.Module):
    """
    Converts an RGB image into a single-channel grayscale image and applies
    wavelet denoising. The output is scaled to [-1,1] with shape (B,1,H,W).
    """
    def __init__(self, wavelet='db1', level=3):
        super(WaveletDenoiseExtractor, self).__init__()
        self.wavelet = wavelet
        self.level = level

    def forward(self, image_tensor: torch.Tensor) -> torch.Tensor:
        # 1) Convert to grayscale (B,3,H,W) -> (B,1,H,W)
        gray_tensor = torch.mean(image_tensor, dim=1, keepdim=True)
        # 2) Move to CPU as NumPy: (B,1,H,W)
        gray_np = gray_tensor.cpu().numpy()
        denoised_list = []
        for i in range(gray_np.shape[0]):
            # (1,H,W) -> (H,W)
            gray_2d = gray_np[i, 0, :, :]
            # Convert from [-1,1] to [0,255]
            gray_255 = ((gray_2d + 1) * 127.5).clip(0, 255).astype(np.uint8)
            gray_float = gray_255.astype(np.float32)
            # Apply wavelet denoising.
            denoised_2d = wavelet_denoise(gray_float, wavelet=self.wavelet, level=self.level)
            denoised_2d = np.clip(denoised_2d, 0, 255)
            denoised_list.append(denoised_2d)
        # Stack into (B,H,W)
        denoised_array = np.stack(denoised_list, axis=0).astype(np.float32)
        # Convert to torch tensor and add channel dimension: (B,1,H,W)
        wavelet_tensor = torch.from_numpy(denoised_array).unsqueeze(1)
        # Rescale from [0,255] to [-1,1]
        wavelet_tensor = wavelet_tensor / 127.5 - 1.0
        return wavelet_tensor.to(image_tensor.device)


def wavelet_denoise(image: np.ndarray, wavelet='db1', level=3) -> np.ndarray:
    """
    Perform wavelet denoising on a single-channel image.
    """
    # Decompose image with pywt.
    coeffs = pywt.wavedec2(image, wavelet, level=level)
    # Zero out detail coefficients.
    new_coeffs = [coeffs[0]]
    new_coeffs.extend([
        tuple(np.zeros_like(detail) for detail in group) for group in coeffs[1:]
    ])
    # Reconstruct the image.
    denoised = pywt.waverec2(new_coeffs, wavelet)
    return denoised
