import copy
from omegaconf import DictConfig, OmegaConf
import hydra
import torch
from torch.utils.data import Dataset, DataLoader, Subset
import numpy as np
import os
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from pathlib import Path
from tqdm import tqdm as tqdm
import argparse
import logging
import json
import sys
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import models

logging.basicConfig(level = logging.INFO)

log = logging.getLogger(__name__)

def get_effective_dim(contexts):
    pca = PCA()
    reduced = pca.fit_transform(contexts)

    ratios = pca.explained_variance_ratio_
    dim = 0
    dist = {i:0 for i in range(len(ratios))}
    while dim < len(ratios):
        percent = np.sum(ratios[:dim])
        if percent > 0.95:
            break
        dist[dim] = percent.item()
        dim += 1
    return dim, dist

@hydra.main(config_path="conf")
def main(cfg: DictConfig) -> None:
    log.info(f"Run decoding on bottleneck features")
    log.info(OmegaConf.to_yaml(cfg, resolve=True))
    out_dir = os.getcwd()
    log.info(f'Working directory {os.getcwd()}')
    if "out_dir" in cfg.exp:
        out_dir = cfg.exp.out_dir
    log.info(f'Output directory {out_dir}')

    test_rep_path = cfg.exp.test_rep_path
    with open(os.path.join(test_rep_path), "rb") as f:
        test_intermediates = np.load(f)
    dim, dist = get_effective_dim(test_intermediates)

    Path(cfg.exp.output_path).mkdir(exist_ok=True, parents=True)
    output_path = os.path.join(cfg.exp.output_path, "results.json")
    results = {"dim": dim}
    with open(output_path, "w") as f:
        json.dump(results, f)

if __name__=="__main__":
    main()
