import numpy as np
from typing import Dict
from config import FeedbackConfig

class HumanFeedbackModule:
    def __init__(self, config: FeedbackConfig):
        self.expertise_level = config.human_expertise_level
        self.inter_annotator_agreement = config.human_inter_annotator_agreement
    
    def evaluate(self, question: str, answer: str, metrics: Dict[str, float]) -> float:
        base_reward = (
            metrics['factual_accuracy'] * 0.5 +
            metrics['coherence_score'] / 5.0 * 0.3 +
            (1 - metrics['hallucination_rate']) * 0.2
        )
        
        expert_noise = np.random.normal(0, 1 - self.expertise_level) * 0.1
        agreement_noise = np.random.normal(0, 1 - self.inter_annotator_agreement) * 0.05
        
        human_reward = base_reward + expert_noise + agreement_noise
        return np.clip(human_reward, 0, 1)
    
    def batch_evaluate(self, questions: list, answers: list, metrics_list: list) -> list:
        return [self.evaluate(q, a, m) for q, a, m in zip(questions, answers, metrics_list)]

class AIFeedbackModule:
    def __init__(self, config: FeedbackConfig):
        self.bias_level = config.ai_bias_level
        self.uncertainty_threshold = config.ai_uncertainty_threshold
    
    def evaluate(self, question: str, answer: str, metrics: Dict[str, float]) -> float:
        base_reward = (
            metrics['factual_accuracy'] * 0.4 +
            metrics['coherence_score'] / 5.0 * 0.25 +
            (1 - metrics['hallucination_rate']) * 0.25 +
            metrics['calibration_score'] * 0.1
        )
        
        ai_reward = base_reward + self.bias_level
        
        if metrics['calibration_score'] < self.uncertainty_threshold:
            ai_reward *= 0.8
        
        return np.clip(ai_reward, 0, 1)
    
    def batch_evaluate(self, questions: list, answers: list, metrics_list: list) -> list:
        return [self.evaluate(q, a, m) for q, a, m in zip(questions, answers, metrics_list)]

class UncertaintyEstimator:
    def __init__(self, threshold: float = 0.3):
        self.threshold = threshold
    
    def estimate_uncertainty(self, metrics: Dict[str, float]) -> float:
        calibration_uncertainty = 1 - metrics['calibration_score']
        consistency_uncertainty = abs(metrics['factual_accuracy'] - (1 - metrics['hallucination_rate']))
        
        overall_uncertainty = (calibration_uncertainty + consistency_uncertainty) / 2
        return np.clip(overall_uncertainty, 0, 1)
    
    def should_mask(self, metrics: Dict[str, float]) -> bool:
        return self.estimate_uncertainty(metrics) > self.threshold

class RewardIntegrator:
    def __init__(self, human_feedback: HumanFeedbackModule, ai_feedback: AIFeedbackModule):
        self.human_feedback = human_feedback
        self.ai_feedback = ai_feedback
        self.uncertainty_estimator = UncertaintyEstimator()
    
    def compute_hybrid_reward(self, question: str, answer: str, metrics: Dict[str, float], alpha: float) -> float:
        human_reward = self.human_feedback.evaluate(question, answer, metrics)
        ai_reward = self.ai_feedback.evaluate(question, answer, metrics)
        
        hybrid_reward = alpha * human_reward + (1 - alpha) * ai_reward
        
        if self.uncertainty_estimator.should_mask(metrics):
            hybrid_reward *= 0.7
        
        return hybrid_reward
    
    def batch_compute_hybrid_reward(self, questions: list, answers: list, 
                                  metrics_list: list, alphas: list) -> list:
        rewards = []
        for q, a, m, alpha in zip(questions, answers, metrics_list, alphas):
            reward = self.compute_hybrid_reward(q, a, m, alpha)
            rewards.append(reward)
        return rewards