import os
from typing import Dict

# from .evaluator import Evaluator

from langchain.evaluation import load_evaluator
from langchain_community.chat_models import MoonshotChat


from abc import ABC, abstractmethod

class Evaluator(ABC):
    CRITERIA: dict[str, str]

    @abstractmethod
    def evaluate_response(self, response: str) -> int: ...
    
class KimiEvaluator(Evaluator):
    DEFAULT_MODEL_KWARGS: Dict = dict(temperature=0)

    CRITERIA = {
        "accuracy": (
            "Score 1: The answer is completely unrelated to the reference.\n"
            "Score 3: The answer has minor relevance but does not align with the reference.\n"
            "Score 5: The answer has moderate relevance but contains inaccuracies.\n"
            "Score 7: The answer aligns with the reference but has minor omissions.\n"
            "Score 10: The answer is completely accurate and aligns perfectly with the reference.\n"
            "Only respond with a numerical score."
        )
    }

    def __init__(
        self,
        model_name: str = "moonshot-v1-8k",   # ② 改成 Kimi 模型
        model_kwargs: Dict = DEFAULT_MODEL_KWARGS,
        true_answer: str = None,
        question_asked: str = None,
    ):
        if not true_answer or not question_asked:
            raise ValueError("true_answer and question_asked must be supplied with init.")

        self.model_name = model_name
        self.model_kwargs = model_kwargs
        self.true_answer = true_answer
        self.question_asked = question_asked

        # ③ 换环境变量名
        api_key = "sk-U1GJbm3JI3l1Hj69MLTE35Go2QNcRFwd5jcqndqnPF8EVYro"
        # api_key = os.getenv("MOONSHOT_API_KEY")
        if not api_key:
            raise ValueError("MOONSHOT_API_KEY must be in env to use Kimi evaluator.")

        self.api_key = api_key


        self.evaluator = MoonshotChat(
            model=self.model_name,
            api_key=self.api_key,
            base_url="https://api.moonshot.cn/v1",  # 官方 endpoint
            **self.model_kwargs,
        )

    def evaluate_response(self, response: str) -> int:
        evaluator = load_evaluator(
            "labeled_score_string",
            criteria=self.CRITERIA,
            llm=self.evaluator,
        )

        eval_result = evaluator.evaluate_strings(
            prediction=response,
            reference=self.true_answer,
            input=self.question_asked,
        )
        return int(eval_result["score"])