import os
import pickle
import torch
import numpy as np
from collections import Counter
from sklearn.neighbors import NearestNeighbors

from llm_router.router.base_router import BaseRouter, RouterInput, RouterOutput, PREFERENCES
from llm_router.data.utils import get_costs

class KNNClassifierRouter(BaseRouter):
    
    @property
    def name(self):
        return f"knn_classifier_router"
    
    def fit(self, trainset, valset, configs):
        self.benchmark = trainset.get_benchmark()
        
        self.max_cost = self.benchmark["costs"].max().item()
        
        if os.path.exists(os.path.join(configs.training.output_dir, "knn.pkl")):
            with open(os.path.join(configs.training.output_dir, "knn.pkl"), "rb") as f:
                self.knn = pickle.load(f)
            return
        
        self.knn = NearestNeighbors(
            n_neighbors=self.config.n_neighbors,
            metric="cosine",
            leaf_size=self.config.leaf_size,
            n_jobs=-1,
        ).fit(self.benchmark["embeddings"].cpu().numpy())
        
        os.makedirs(configs.training.output_dir, exist_ok=True)
        with open(os.path.join(configs.training.output_dir, "knn.pkl"), "wb") as f:
            pickle.dump(self.knn, f)
    
    def route(self, router_input: RouterInput):
        _, indices = self.knn.kneighbors(router_input.embedding.unsqueeze(0).cpu().numpy())
        neigh_scores = torch.stack([self.benchmark['scores'][idx] for idx in indices[0]]) # [k,M]
        neigh_costs = torch.stack([self.benchmark['costs'][idx] for idx in indices[0]]) # [k,M]
        
        outputs = {}
        preference = self.config.preference / self.max_cost
        q = neigh_scores - preference * neigh_costs / self.max_cost
        neigh_routing = torch.argmax(q, dim=1).tolist() # [k] 
        routing_id = Counter(neigh_routing).most_common()[0][0]
        routing_model = list(router_input.routing_config.keys())[routing_id]
        outputs[preference] = RouterOutput(
            idx=router_input.idx,
            routing_config=router_input.routing_config,
            scores=router_input.scores,
            costs=router_input.costs,
            input_tokens=router_input.input_tokens,
            output_tokens=router_input.output_tokens,
            routing_id=routing_id,
            routing_model=routing_model,
            info={
                "max_cost": self.max_cost,
                "neigh_routing": neigh_routing,
            },
        )
            
        return outputs