"""Concept-biased random policy implementation for the FRAME system.

This policy selects random actions with a bias towards concept-producing rules over
conjecture-producing rules, giving concept rules a 75% selection preference when available.
"""

from typing import List, Dict, Any, Optional, Union
from collections import defaultdict

from frame.policies.base import Policy
from frame.knowledge_base.knowledge_graph import KnowledgeGraph
from frame.environments.math_env import ValidAction, MathEnv


class ConceptBiasedRandomPolicy(Policy):
    """
    Policy that randomly selects actions with a bias towards concept-producing rules.
    
    This policy gives concept-producing rules a 75% chance of being selected when available,
    helping to build up the concept space before generating conjectures.
    """
    
    def __init__(self, **kwargs):
        """Initialize the concept-biased random policy."""
        # Random policy requires enumerated actions
        super().__init__(requires_enumeration=True, **kwargs)
        self.production_rules = None
    
    def set_production_rules(self, production_rules):
        """
        Set the production rules for the policy.
        
        Args:
            production_rules: List of production rules
        """
        self.production_rules = production_rules
        
    def set_rules(self, rules):
        """
        Set the rules for the policy (compatibility method for TheoryBuilder).
        
        Args:
            rules: List of production rules
        """
        self.production_rules = rules
    
    def select_action(self, env: MathEnv) -> Optional[int]:
        """
        Select a random action with preference for concept-producing rules over conjecture-producing rules.
        
        Args:
            env: The math environment containing the current state
            
        Returns:
            Index of the selected action or None if no valid actions
        """
        valid_actions = env.valid_actions
        
        if not valid_actions or self.production_rules is None:
            return None
        
        # Group valid actions by rule_idx
        concept_actions = []
        conjecture_actions = []
        
        for i, action in enumerate(valid_actions):
            # Check if the rule is a concept-producing rule or a conjecture-producing rule
            # We can determine this by checking the rule's type attribute
            rule = self.production_rules[action.rule_idx]
            if rule.type == "Concept":
                concept_actions.append(i)
            else:  # rule.type == "Conjecture"
                conjecture_actions.append(i)
        
        # Prefer concept-producing actions (75% chance) if available
        if concept_actions and (not conjecture_actions or self.rng.random() < 0.75):
            return int(self.rng.choice(concept_actions))
        elif conjecture_actions:
            return int(self.rng.choice(conjecture_actions))
        else:
            # Fallback to completely random selection if categorization failed
            return int(self.rng.randint(0, len(valid_actions) - 1)) 