"""Enhanced Interestingness-Guided Policy implementation for the FRAME system.

This policy can be configured to use different strategies for selecting concepts
and actions, allowing flexible exploration strategies for mathematical discovery.

Key Features:
- Flexible concept selection strategies: interestingness scoring or random
- Flexible action selection strategies: simulation-based, rule-based, or pure random
- Ability to configure each stage independently to create different policy behaviors
- Support for custom interestingness functions that operate on the knowledge graph

Examples of use:
```python
# Example 1: Create a policy using HR interestingness functions directly
hr_policy = InterestingnessGuidedPolicy(
    concept_selection=ConceptSelectionStrategy.INTERESTINGNESS,
    action_selection=ActionSelectionStrategy.SIMULATE_AND_SCORE,
    interestingness_scorer=HR_INTERESTINGNESS_FUNCTION,
    top_k_concepts=8,
    temperature=1.5
)

# Example 2: Create a policy that uses custom interestingness scoring
def my_interestingness_scorer(entity_id, graph):
    # Your custom logic to score an entity's interestingness
    return score

direct_policy = InterestingnessGuidedPolicy(
    concept_selection=ConceptSelectionStrategy.INTERESTINGNESS,
    action_selection=ActionSelectionStrategy.SIMULATE_AND_SCORE,
    interestingness_scorer=my_interestingness_scorer,
    top_k_concepts=5,
    simulation_limit=20,
    temperature=1.2
)

# Example 3: Create a policy that loads the interestingness function from a file
file_policy = InterestingnessGuidedPolicy(
    concept_selection=ConceptSelectionStrategy.INTERESTINGNESS,
    action_selection=ActionSelectionStrategy.SIMULATE_AND_SCORE,
    interestingness_function_path="path/to/function.py",
    top_k_concepts=5,
    temperature=1.0
)
```

See the main function at the bottom of this file for more detailed usage examples.
"""

from typing import List, Dict, Any, Optional, Union, Callable, Tuple, Literal
import numpy as np
from collections import defaultdict
import logging
from enum import Enum, auto
import os
import importlib.util
import sys
import functools  # Added import for functools
import traceback
import copy  # Add import for deepcopy
import time

from frame.policies.base import Policy
from frame.knowledge_base.knowledge_graph import KnowledgeGraph, NodeType
from frame.environments.math_env import ValidAction, MathEnv
from frame.productions.base import ProductionRule
from frame.knowledge_base.entities import Concept, Conjecture, Theorem
from frame.knowledge_base.knowledge_graph import ConstructionStep
from frame.interestingness.learning.dsl_primitives import (
    HR_INTERESTINGNESS_FUNCTION,
)

# Initialize logger
logger = logging.getLogger(__name__)

# Define strategy types as enums
class ConceptSelectionStrategy(Enum):
    """Strategies for selecting interesting concepts."""
    INTERESTINGNESS = auto()  # Score concepts directly using interestingness function
    RANDOM = auto()           # Random selection

class ActionSelectionStrategy(Enum):
    """Strategies for selecting actions."""
    SIMULATE_AND_SCORE = auto()      # Simulate actions and score resulting entities
    RULE_BASED_RANDOM = auto()       # Select rules randomly, then actions within rules randomly
    PURE_RANDOM = auto()             # Completely random selection

class InterestingnessGuidedPolicy(Policy):
    """
    A flexible policy that uses interestingness in various ways to guide action selection.
    
    This policy can be configured to use different strategies for selecting
    concepts and actions. The policy operates in two main stages:
    
    1. Concept Selection: Identifies the most interesting concepts to use as inputs
       for possible actions. This stage uses one of two strategies:
       - INTERESTINGNESS: Uses a single scoring function to evaluate concepts
       - RANDOM: Selects concepts randomly
       
    2. Action Selection: Chooses the most promising action from valid candidates.
       This stage uses one of three strategies:
       - SIMULATE_AND_SCORE: Simulates each action's outcome and scores the result
       - RULE_BASED_RANDOM: First selects a rule randomly, then an action within that rule
       - PURE_RANDOM: Selects an action completely randomly
       
    By combining different strategies for each stage, this policy can be configured
    to behave in various ways - from completely random exploration to sophisticated
    simulation-based approaches or anything in between.
    """
    
    def __init__(self, 
                 rules: List[ProductionRule] = None,
                 # Concept selection parameters 
                 concept_selection: ConceptSelectionStrategy = ConceptSelectionStrategy.INTERESTINGNESS,
                 interestingness_scorer: Optional[Callable[[str, KnowledgeGraph], float]] = None,
                 interestingness_function_path: Optional[str] = None,
                 top_k_concepts: int = 5,
                 # Action selection parameters
                 action_selection: ActionSelectionStrategy = ActionSelectionStrategy.SIMULATE_AND_SCORE,
                 action_selection_params: Optional[Dict[str, Any]] = None,
                 # Common parameters
                 temperature: float = 1.0,
                 # Compatible with Hydra's nested params structure
                 params: Optional[Dict[str, Any]] = None,
                 max_concept_name_length: int = 2000,
                 **kwargs):
        """
        Initialize the interestingness-guided policy.
        
        Args:
            rules: List of available production rules.
            
            # Concept selection parameters
            concept_selection: Strategy to use for sampling interesting concepts.
            interestingness_scorer: A function that takes (entity_id, graph) and returns a score.
                                    Used when concept_selection=INTERESTINGNESS.
            interestingness_function_path: Path to a Python file containing an interestingness 
                                           function. If provided, will load the function from 
                                           this file and use it as the interestingness scorer.
                                           If relative, it's resolved relative to the project root.
            top_k_concepts: Number of top concepts to consider as inputs.
            
            # Action selection parameters
            action_selection: Strategy for selecting actions.
            action_selection_params: Dictionary of parameters specific to the action selection
                                    strategy. Supported parameters include:
                                    - 'simulation_limit': Maximum actions to simulate 
                                      (for SIMULATE_AND_SCORE)
            
            # Common parameters
            temperature: Temperature for sampling (higher = more uniform).
            params: Optional dictionary of parameters used with Hydra configs.
                    If provided, parameters from this dict will override direct parameters.
            max_concept_name_length: Maximum length of concept names for filtering
            **kwargs: Additional parameters.
            
        Raises:
            ValueError: If required parameters for the chosen strategies are missing or invalid
        """
        # Determine project root based on the location of this file
        policy_file_path = os.path.abspath(__file__)
        # Assumes structure: PROJECT_ROOT/frame/policies/interestingness_guided_policy.py
        project_root = os.path.dirname(os.path.dirname(os.path.dirname(policy_file_path)))
        logger.debug(f"Determined project root for resolving paths: {project_root}")

        # Handle params from Hydra config if provided
        if params is not None:
            # Override with values from params dict if provided
            if 'concept_selection' in params:
                concept_selection = params['concept_selection']
            if 'interestingness_scorer' in params:
                interestingness_scorer = params['interestingness_scorer']
            if 'interestingness_function_path' in params:
                interestingness_function_path = params['interestingness_function_path']
            if 'top_k_concepts' in params:
                top_k_concepts = params['top_k_concepts']
            if 'action_selection' in params:
                action_selection = params['action_selection']
            if 'action_selection_params' in params:
                action_selection_params = params['action_selection_params']
            if 'temperature' in params:
                temperature = params['temperature']
            if 'max_concept_name_length' in params:
                max_concept_name_length = params['max_concept_name_length']
                
            # Merge any additional params into kwargs
            for k, v in params.items():
                if k not in ('concept_selection', 'interestingness_scorer', 'interestingness_function_path',
                             'top_k_concepts', 'action_selection', 'action_selection_params', 
                             'temperature', 'max_concept_name_length'):
                    kwargs[k] = v
            logger.info(f"top_k_concepts: {top_k_concepts}")
            logger.info(f"temperature: {temperature}")
            logger.info(f"action_selection: {action_selection}")
            logger.info(f"concept_selection: {concept_selection}")
        
        # Convert string enum values to Enum members if needed
        if isinstance(concept_selection, str):
            concept_selection = ConceptSelectionStrategy[concept_selection]
        if isinstance(action_selection, str):
            action_selection = ActionSelectionStrategy[action_selection]
        
        # This policy does not require full action enumeration
        super().__init__(requires_enumeration=False, **kwargs)
        self.rules = rules or []
        
        # Set concept selection strategy
        self.concept_selection = concept_selection
        
        # Common parameter validation
        assert top_k_concepts > 0, "top_k_concepts must be greater than 0"
        self.top_k_concepts = top_k_concepts
        
        # Set temperature
        self.temperature = temperature
        self.max_concept_name_length = max_concept_name_length
        
        # Set action selection strategy
        self.action_selection = action_selection
        
        # Set simulation limit for SIMULATE_AND_SCORE strategy
        # (default used if not provided)
        action_selection_params = action_selection_params or {}
        self.simulation_limit = action_selection_params.get('simulation_limit', 3) # TODO(Make this configurable.)
        self.action_selection_params = action_selection_params
        
        # We'll store the current graph for interestingness scoring
        self._current_graph = None
        
        # Configure internal scoring function based on concept selection strategy
        if concept_selection == ConceptSelectionStrategy.INTERESTINGNESS:
            # Load interestingness function from file if path is provided
            if interestingness_function_path:
                # Check if the provided path is already absolute
                if os.path.isabs(interestingness_function_path):
                    absolute_function_path = interestingness_function_path
                    logger.info(f"Loading interestingness function from provided absolute path: {absolute_function_path}")
                else:
                    # Resolve the relative path based on the project root
                    absolute_function_path = os.path.join(project_root, interestingness_function_path)
                    # Normalize the path (e.g., handle .. components, ensures consistent format)
                    absolute_function_path = os.path.normpath(absolute_function_path)
                    logger.info(f"Resolved relative path '{interestingness_function_path}' to absolute path: {absolute_function_path} (relative to project root: {project_root})")

                # Ensure the resolved path exists before loading
                if not os.path.exists(absolute_function_path):
                    raise FileNotFoundError(f"Resolved interestingness function file not found: {absolute_function_path} (original path: {interestingness_function_path}, project root: {project_root})")

                self._score_function = self._load_function_from_file(absolute_function_path)
                logger.info(f"Using directly provided interestingness scorer: {getattr(interestingness_scorer, '__name__', 'Unnamed Scorer')}")
            # Otherwise check for direct interestingness_scorer parameter
            elif interestingness_scorer is None:
                raise ValueError("For INTERESTINGNESS strategy, either interestingness_scorer or interestingness_function_path must be provided")
            else:
                self._score_function = interestingness_scorer
                logger.info(f"Using directly provided interestingness scorer: {getattr(interestingness_scorer, '__name__', 'Unnamed Scorer')}")
        else:
            # For RANDOM concept selection, still need a default function
            logger.error(f"Concept selection strategy is {concept_selection.name}. "
                        f"Using default (e.g., all-zeros) internal interestingness scorer, as this strategy "
                        f"does not rely on explicit interestingness scores for concept selection.")
            self._score_function = lambda entity_id, graph: 0.0  # Default to zero
        
        # This is the public scoring function that our internal code will call
        # It checks the graph and handles different argument patterns
        self.interestingness_scorer = self._score_function
        
        # Initialize action history
        self.action_history = []
    
    def _get_concept_display_name(self, concept_id: str, graph: KnowledgeGraph) -> str:
        """Gets the display name of a concept (name attribute or ID), raises KeyError if not found."""
        if concept_id not in graph.nodes:
            raise KeyError(f"Concept ID '{concept_id}' not found in the graph when trying to get its display name.")
        
        entity_data = graph.nodes[concept_id]

        return entity_data['entity'].name # Fallback to ID if no name attribute or name is empty

    def _load_function_from_file(self, file_path: str) -> Callable[[str, KnowledgeGraph], float]:
        """
        Load an interestingness function from a Python file.
        
        The file should contain a function named 'calculate_interestingness' that 
        takes two parameters:
        - entity_id: str (the ID of the entity to score)
        - graph: KnowledgeGraph (the knowledge graph)
        
        and returns a float value representing the interestingness score.
        
        Args:
            file_path: Path to the Python file containing the function
            
        Returns:
            A callable function that takes entity_id and graph parameters
            
        Raises:
            FileNotFoundError: If the file doesn't exist
            ImportError: If the module couldn't be imported
            AttributeError: If the function isn't found in the module
            ValueError: If the function has an invalid signature
        """
        # Make sure the file exists
        if not os.path.exists(file_path):
            raise FileNotFoundError(f"Interestingness function file not found: {file_path}")
        
        try:
            # Get the directory and filename
            directory, filename = os.path.split(file_path)
            module_name = os.path.splitext(filename)[0]
            
            # Set up the spec
            spec = importlib.util.spec_from_file_location(module_name, file_path)
            if spec is None:
                raise ImportError(f"Failed to create module spec from {file_path}")
                
            # Load the module
            module = importlib.util.module_from_spec(spec)
            sys.modules[module_name] = module
            spec.loader.exec_module(module)
            
            # Check for calculate_interestingness function
            if not hasattr(module, 'calculate_interestingness'):
                raise AttributeError(
                    f"Module {module_name} doesn't contain a 'calculate_interestingness' function. "
                    f"Available functions: {[f for f in dir(module) if callable(getattr(module, f)) and not f.startswith('__')]}"
                )
            
            function = module.calculate_interestingness
            
            # Test that the function has the right signature (entity_id, graph)
            import inspect
            sig = inspect.signature(function)
            param_count = len(sig.parameters)
            
            if param_count < 2:
                raise ValueError(
                    f"The calculate_interestingness function should accept at least 2 parameters: "
                    f"entity_id and graph. Found {param_count} parameters: {list(sig.parameters.keys())}"
                )
            
            # Verify the function works by testing it
            test_graph = KnowledgeGraph()  # Create an empty test graph
            test_entity_id = "concept_999999"  # A dummy entity ID that follows the expected pattern
            
            try:
                # Add a dummy node to the graph for testing
                test_graph.add_node(test_entity_id, entity=None, node_type=NodeType.CONCEPT)
                
                # Try calling the function with the test parameters
                result = function(test_entity_id, test_graph)
                
                # Check that the result is a number
                if not isinstance(result, (int, float)):
                    raise TypeError(f"Function returned {type(result).__name__}, expected a number")
                    
                # Verify that the result is finite
                if not np.isfinite(result):
                    raise ValueError(f"Function returned {result}, which is not a finite number")
                    
                logger.info(f"Successfully loaded and validated interestingness function from {file_path}. This will be used as the policy's scorer.")
                
                # Return the function
                return function
            finally:
                # Clean up the test graph
                if test_entity_id in test_graph.nodes:
                    test_graph.remove_node(test_entity_id)
                
        except Exception as e:
            logger.error(f"Error loading interestingness function from {file_path}: {e}")
            
            # For debugging, show the traceback
            logger.debug(traceback.format_exc())
            
            # Log a more helpful message
            logger.error(
                "Make sure the file contains a function named 'calculate_interestingness' that takes "
                "entity_id and graph parameters and returns a number."
            )
            
            # Re-raise the exception
            raise

    def set_rules(self, rules: List[ProductionRule]) -> None:
        """Set the production rules for this policy."""
        self.rules = rules
        logger.info(f"Set {len(rules)} production rules")
    
    def set_interestingness_scorer(self, scorer: Callable[[str, KnowledgeGraph], float]) -> None:
        """Update the interestingness scorer function used with INTERESTINGNESS strategy."""
        if self.concept_selection == ConceptSelectionStrategy.INTERESTINGNESS:
            self._score_function = scorer
            # No need to redefine interestingness_scorer, as it already points to _score_entity_public
            logger.info("Updated direct interestingness scorer")
        else:
            logger.warning("Ignoring interestingness_scorer as concept_selection strategy is not INTERESTINGNESS")
    
    def select_action(self, env: MathEnv) -> Optional[ValidAction]:
        """
        Generate an action using the configured strategies.
        
        Different concept and action selection strategies can be combined
        to create various behaviors:
        
        1. Using INTERESTINGNESS + SIMULATE_AND_SCORE: 
           - Score concepts directly with the scorer
           - Simulate actions and score resulting entities
           
        2. Using RANDOM + RULE_BASED_RANDOM:
           - Sample concepts randomly
           - Select rules randomly, then actions within rules
           
        Args:
            env: The math environment containing the current state.
            
        Returns:
            A ValidAction object or None if no valid action is found.
            
        Raises:
            TypeError: If the interestingness function returns non-numeric values
            ValueError: If required components are missing or concept scoring fails
        """
        graph = env.graph
        
        # Store current graph for interestingness scoring during printing
        self._current_graph = graph
        
        if not self.rules:
            logger.warning("No rules available.")
            return None
        
        # 1. Sample interesting concepts based on the selected strategy
        all_concepts_in_graph = graph.get_all_concepts()
        if not all_concepts_in_graph:
            logger.info("No concepts in the graph.")
            return None

        # Filter concepts by name length before sampling
        pre_filtered_concepts = []
        for concept_id in all_concepts_in_graph:
            try:
                display_name = self._get_concept_display_name(concept_id, graph)
                if len(display_name) <= self.max_concept_name_length:
                    pre_filtered_concepts.append(concept_id)
                else:
                    logger.debug(f"Concept '{display_name}' (ID: {concept_id}) excluded due to name length: {len(display_name)} > {self.max_concept_name_length}")
            except KeyError as e:
                # This error is now raised by _get_concept_display_name if ID not in graph
                logger.error(f"Error getting display name for concept ID '{concept_id}': {e}. This concept will be excluded.")
                # Optionally re-raise if this should halt the process: raise
        
        if not pre_filtered_concepts:
            logger.warning("No concepts remaining after filtering by name length.")
            return None
        
        logger.info(f"Filtered {len(all_concepts_in_graph) - len(pre_filtered_concepts)} concepts due to name length. {len(pre_filtered_concepts)} concepts remaining for sampling.")

        # Sample concepts using the selected strategy from the pre_filtered_concepts
        try:
            sampled_concepts_ids, sampled_concepts_scores = self._sample_concepts(graph, pre_filtered_concepts)
            if not sampled_concepts_ids:
                logger.info("Could not sample any interesting concepts from the pre-filtered list.")
                return None
            else:
                # Log the sampled concepts and their scores
                log_message = f"Sampled {len(sampled_concepts_ids)} concepts for action generation:"
                for i, sc_id in enumerate(sampled_concepts_ids):
                    try:
                        sc_entity, _, _ = graph.get_node(sc_id)
                        name_to_log = sc_entity.name if hasattr(sc_entity, 'name') else sc_id
                        score_to_log = sampled_concepts_scores[i]
                        log_message += f"\n  - {name_to_log} (ID: {sc_id}), Score: {score_to_log:.4f}"
                    except KeyError:
                        log_message += f"\n  - {sc_id} (not found in graph), Score: N/A"
                logger.info(log_message)

        except TypeError as te:
            # If we get type errors from the interestingness function, log and reraise
            logger.error(f"Type error in interestingness function: {te}")
            raise
        except Exception as e:
            logger.error(f"Error sampling concepts: {e}")
            return None

        # 2. Generate candidate actions involving these concepts
        try:
            # Only consider actions derived from the sampled interesting concepts
            candidate_actions = env._compute_valid_actions(
                concepts=sampled_concepts_ids, conjectures=[]
            ) 
        except Exception as e:
            logger.error(f"Error computing valid actions: {e}")
            return None

        # 3. Filter out
        # a) actions already applied
        # b) actions whose extra parameters are too large (separated by argument names)
        # c) actions that would create an entity with too large of input arity (< 6 by default)
        # d) actions that are too deep in the rule tree (< 20 by default)
        candidate_actions = [action for action in candidate_actions 
                             if action not in env.applied_actions]
        # modify to check among hte list for the params 
        # shared_vars=2, index_to_specialize=1, indices_to_quantify=3, output_to_input_map=1 indices_to_match=3 indices_to_map (forall conjecture) = 3
        candidate_actions = [action for action in candidate_actions
                             if len(action.params.get('shared_vars', [])) <= 2 and 
                                # len(action.params.get('index_to_specialize', [])) <= 1 and # Note: Erroneous
                                len(action.params.get('indices_to_quantify', [])) <= 5 and # note this 5 can be optimized to 3 in case when its not a forall *conjecture*
                                len(action.params.get('output_to_input_map', [])) <= 1 and 
                                len(action.params.get('indices_to_match', [])) <= 3 and 
                                len(action.params.get('indices_to_map', [])) <= 3]
        candidate_actions = [action for action in candidate_actions 
                             if all(env.graph.nodes[node_id]['entity'].get_input_arity() <= 6 for node_id in action.input_nodes)]
        candidate_actions = [action for action in candidate_actions 
                             if all(env.graph.construction_depth(node_id) < 20 for node_id in action.input_nodes)]

        if not candidate_actions:
            logger.info("No new valid actions found from sampled concepts.")
            return None
            
        # 4. Select an action based on the configured strategy
        try:
            if self.action_selection == ActionSelectionStrategy.SIMULATE_AND_SCORE:
                returned_action = self._select_action_by_simulation(env, candidate_actions)
                if returned_action is not None:
                    # logger.info(f"Selected action: {returned_action}")
                    return returned_action
                else:
                    # sample random action
                    return candidate_actions[self.rng.randint(0, len(candidate_actions))]
            elif self.action_selection == ActionSelectionStrategy.RULE_BASED_RANDOM:
                return self._select_action_by_rule_random(env, candidate_actions)
            
            else:  # ActionSelectionStrategy.PURE_RANDOM
                return candidate_actions[self.rng.randint(0, len(candidate_actions))]
        except TypeError as te:
            # If we get type errors from the interestingness function, log and reraise
            logger.error(f"Type error in interestingness function during action selection: {te}")
            raise
    
    def _sample_concepts(self, graph: KnowledgeGraph, concepts: List[str]) -> Tuple[List[str], List[float]]:
        """
        Sample concepts based on the selected strategy.
        
        Args:
            graph: The knowledge graph.
            concepts: List of concept IDs to sample from.
            
        Returns:
            A tuple containing the sampled concept IDs and their scores.
        """
        if not concepts:
            raise ValueError("No concepts to sample from.")
            
        # Store the graph for scoring
        self._current_graph = graph
        
        # Use sampling strategy based on configuration
        if self.concept_selection == ConceptSelectionStrategy.INTERESTINGNESS:
            return self._sample_concepts_interesting(graph, concepts, self.top_k_concepts)
        else:  # ConceptSelectionStrategy.RANDOM
            return self._sample_concepts_random(concepts, self.top_k_concepts)
    
    def _sample_concepts_random(self, concepts: List[str], k: int) -> Tuple[List[str], List[float]]:
        """
        Sample k concepts randomly.
        
        Args:
            concepts: List of concept IDs to choose from
            k: Number of concepts to sample
            
        Returns:
            A tuple containing the sampled concept IDs and their scores.
        """
        # Limit k to the number of available concepts
        k = min(k, len(concepts))
        
        # Return random sample using self.rng instead of np.random
        sampled_concepts = self.rng.choice(concepts, size=k, replace=False).tolist()
        sampled_scores = [self._score_function(concept_id, graph) for concept_id in sampled_concepts]
        
        return sampled_concepts, sampled_scores
    
    def _sample_concepts_interesting(self, graph: KnowledgeGraph, concepts: List[str], k: int) -> Tuple[List[str], List[float]]:
        """
        Sample concepts directly using the interestingness score function.
        
        This method calculates interestingness scores for each concept using the
        provided scorer function, and samples probabilistically based on these scores.
        
        Args:
            graph: The current knowledge graph
            concepts: List of concept IDs to choose from
            k: Number of concepts to sample
            
        Returns:
            A tuple containing the sampled concept IDs and their scores.
            
        Raises:
            TypeError: If the scoring function returns a non-numeric value
            ValueError: If scores are all identical or all zero
        """
        k = min(k, len(concepts))
        
        # Score all concepts
        scores = {}
        for concept_id in concepts:
            try:
                score = self._score_function(concept_id, graph)

                # Validate score type
                if not isinstance(score, (int, float)):
                    raise TypeError(f"Interestingness scorer returned non-numeric score: {score}")

                scores[concept_id] = max(0.0, float(score))  # Ensure score is non-negative
            except Exception as e:
                logger.warning(f"Error scoring concept {concept_id}: {e}")
                scores[concept_id] = 0.0  # Use 0.0 for failed scores

        # --- Log Top Concepts and Scores --- 
        if scores: # Only log if we actually scored something
            try: # Add try block to handle potential errors during logging
                # Sort concepts by score (descending)
                sorted_concepts = sorted(scores.items(), key=lambda item: item[1], reverse=True)
                # Get top k (or fewer if less than k total concepts)
                top_k_to_log = min(k, len(sorted_concepts))
                top_concepts_log = sorted_concepts[:top_k_to_log]

                log_message = f"Top {top_k_to_log} concepts based on interestingness score (before sampling):\n"
                for cid, score_val in top_concepts_log:
                    entity_name = "(Error getting name)" # Default name
                    if cid in graph.nodes and 'entity' in graph.nodes[cid] and hasattr(graph.nodes[cid]['entity'], 'name'):
                        entity_name = graph.nodes[cid]['entity'].name
                    elif cid in graph.nodes:
                         entity_name = cid # Fallback to ID if name attribute missing
                    else:
                         entity_name = f"(ID: {cid} - Not in graph?)"
                         
                    log_message += f"  - {entity_name} (ID: {cid}): {score_val:.4f}\n"
                logger.info(log_message.strip())
            except Exception as log_e:
                logger.error(f"Error occurred during logging of top concepts: {log_e}", exc_info=True)
        else:
            logger.warning("No concepts were successfully scored.")
        # --- End Log --- 

        # Apply softmax with temperature to get sampling probabilities
        def softmax(values, temp):
            # Ensure temp is not zero or negative to avoid division errors or invalid results
            if temp <= 0:
                logger.warning(f"Temperature ({temp}) is non-positive. Using temp=1.0 instead.")
                temp = 1.0
                
            exp_values = [np.exp(val / temp) for val in values]
            sum_exp = sum(exp_values)

            if sum_exp == 0:  # Handle all-zero case
                return [1.0 / len(values)] * len(values)
            return [val / sum_exp for val in exp_values]
        
        # Get concepts and scores as lists
        concept_list = list(scores.keys())
        score_list = [scores[c] for c in concept_list]
        
        # Calculate probabilities with softmax
        probs = softmax(score_list, self.temperature)
        
        # Sample concepts based on probabilities - use self.rng instead of np.random
        sampled_indices = self.rng.choice(
            range(len(concept_list)), 
            size=k, 
            replace=False, 
            p=probs
        )
        
        # Get the sampled concepts
        sampled_concepts = [concept_list[i] for i in sampled_indices]
        
        # Get the scores for the sampled concepts
        sampled_scores = [score_list[i] for i in sampled_indices]
        
        return sampled_concepts, sampled_scores
        
    def _select_action_by_simulation(self, env: MathEnv, candidate_actions: List[ValidAction]) -> Optional[ValidAction]:
        """
        Select an action by simulating outcomes and scoring resulting entities.

        This strategy:
        1. Subsamples candidate actions if there are too many (based on simulation_limit)
        2. Creates deep copies of the environment for each action to simulate.
        3. Executes simulations in parallel using ThreadPoolExecutor.
        4. In each simulation, calls env.step(action) on the copied environment.
        5. Scores the resulting new entity using the configured interestingness approach.
        6. Samples an action probabilistically based on the scores using temperature.

        Args:
            env: The math environment.
            candidate_actions: List of valid actions to consider.

        Returns:
            Selected action or None if no action could be selected.

        Raises:
            TypeError: If the interestingness function returns non-numeric values consistently.
        """
        logger.info(f"Using 'SIMULATE_AND_SCORE' action selection strategy with temperature: {self.temperature:.2f}")
        graph = env.graph # Keep original graph reference if needed elsewhere, but simulation uses copied env's graph

        if not candidate_actions:
            logger.warning("No candidate actions provided to _select_action_by_simulation.")
            return None

        if len(candidate_actions) <= self.simulation_limit:
            actions_to_simulate = candidate_actions
            logger.debug(f"Using all {len(candidate_actions)} candidate actions for simulation (limit: {self.simulation_limit}).")
        else:
            logger.info(f"Sampling {self.simulation_limit} actions from {len(candidate_actions)} candidates for simulation (rule-first uniform sampling).")
            actions_to_simulate = []
            
            # Group candidate actions by rule index
            grouped_by_rule = defaultdict(list)
            for action in candidate_actions:
                grouped_by_rule[action.rule_idx].append(action)
            
            available_rule_indices = list(grouped_by_rule.keys())

            if not available_rule_indices:
                logger.warning("No rules have candidate actions for simulation sampling.")
                # Fallback: if somehow candidate_actions was not empty but groups are, use original candidates up to limit
                actions_to_simulate = candidate_actions[:self.simulation_limit]
            else:
                for _ in range(self.simulation_limit):
                    # Uniformly select a rule index from those that have actions
                    selected_rule_idx = self.rng.choice(available_rule_indices)
                    
                    # Uniformly select an action from that rule's list of actions
                    selected_action = self.rng.choice(grouped_by_rule[selected_rule_idx])
                    actions_to_simulate.append(selected_action)
            
            logger.debug(f"Selected {len(actions_to_simulate)} actions for simulation using rule-first uniform sampling.")

        simulated_results = []  # List of (action, score)
        has_type_error = False  # Keep track of type errors during scoring
        type_error_message = ""

        # Inner function to perform simulation for a single action
        def _simulate_single_action(action: ValidAction) -> Tuple[ValidAction, Optional[float]]:
            nonlocal has_type_error, type_error_message # Allow modifying outer scope variables
            try:
                # Create a deep copy of the environment for this simulation
                copied_env = copy.deepcopy(env)
            except copy.Error as ce:
                logger.warning(f"Could not deepcopy environment for action {action}: {ce}")
                return action, None
            except Exception as e: # Catch other potential deepcopy errors
                logger.warning(f"Unexpected error during deepcopy for action {action}: {e}")
                return action, None

            try:
                # Step the copied environment
                # The environment's step method should handle applying the action and updating its internal graph
                obs, reward, done, truncated, info = copied_env.step(action, is_simulation=True)

                new_entity_ids = info.get("new_entities") # assuming only one new entity is made, which is true as of (4/26)

                if new_entity_ids is None or len(info.get("new_entities")) == 0:
                    # The action might not have produced a new entity (e.g., duplicate, rule failed)
                    logger.debug(f"Simulated Action {action} did not produce a new entity ID.")
                    return action, None # Cannot score if no new entity ID

                # Score the new entity using the *copied* environment's graph state
                try:
                    # Use the actual entity ID and the *hypothetical* graph from the copied env
                    new_entity_id = new_entity_ids[0] # assuming only one new entity is made, which is true as of (4/26)
                    score = self._score_function(new_entity_id, copied_env.graph)

                    # Verify the score is a number
                    if not isinstance(score, (int, float)):
                         # Use the actual ID in the error message
                        raise TypeError(f"Interestingness function returned non-numeric value for entity '{new_entity_id}': {type(score).__name__}")

                    logger.debug(f"Simulated action {action} -> entity '{new_entity_id}', score: {score}")
                    return action, float(score) # Ensure score is float

                except TypeError as te:
                    # Capture type errors specifically from the scoring function
                    has_type_error = True
                    type_error_message = str(te)
                    logger.warning(f"Type error scoring simulated entity '{new_entity_id}' (from action {action}): {te}")
                    return action, None # Failed scoring due to type error
                except Exception as e:
                    logger.warning(f"Error scoring simulated entity '{new_entity_id}' (from action {action}): {e}")
                    return action, None # Failed scoring due to other error

            except Exception as e:
                # Catch errors during env.step
                logger.warning(f"Error stepping environment during simulation for action {action}: {e}", exc_info=True)
                return action, None # Failed simulation step
            finally:
                pass # Need at least finally or except

        # --- Run simulations sequentially ---
        logger.debug(f"Starting sequential simulation for {len(actions_to_simulate)} actions.")
        start_time = time.monotonic()

        for action in actions_to_simulate:
            try:
                # Simulate the action directly
                result_action, score = _simulate_single_action(action)

                # Ensure the result corresponds to the correct action and score is valid
                if result_action == action and score is not None:
                    # Check score is finite
                    if np.isfinite(score):
                        simulated_results.append((action, score))
                    else:
                        logger.warning(f"Simulation for action {action} resulted in non-finite score: {score}. Excluding.")
                # No need to check result_action == action if called sequentially, but keep score check
                elif score is None:
                     # _simulate_single_action already logs reasons for returning None score
                     pass

            except Exception as exc:
                # Log exceptions raised by the _simulate_single_action task itself
                logger.error(f'Action {action} generated an exception during sequential simulation: {exc}', exc_info=True)

        # --- Calculate and log duration ---
        end_time = time.monotonic()
        duration = end_time - start_time
        logger.info(f"Simulated {len(actions_to_simulate)} actions sequentially in {duration:.2f} seconds.")
        # --- End duration logging ---

        logger.debug(f"Finished sequential simulations. Collected {len(simulated_results)} successful finite results.")

        # If we encountered type errors and have no valid results, raise the error
        if has_type_error and not simulated_results:
            raise TypeError(f"Interestingness function failed consistently with type errors: {type_error_message}")

        # Handle case where no actions could be successfully simulated and scored
        if not simulated_results:
            logger.info("No actions could be successfully simulated and scored with finite results.")
            # Fallback to random selection among the original candidate actions
            if candidate_actions:
                logger.info("Falling back to random selection among all candidate actions.")
                # Use self.rng for random choice
                return candidate_actions[self.rng.randint(0, len(candidate_actions))]
            logger.warning("No candidate actions available for fallback.")
            return None # No actions simulated successfully and no candidates to fall back on
        else:
            logger.info(f"Successfully simulated {len(simulated_results)} actions. Scores:")
            for sim_action, sim_score in simulated_results:
                rule_name_log = "Unknown Rule"
                if sim_action.rule_idx < len(self.rules):
                    rule_name_log = self.rules[sim_action.rule_idx].name
                
                input_node_names_log = []
                for node_id_log in sim_action.input_nodes:
                    try:
                        entity_log = graph.nodes[node_id_log]['entity']
                        input_node_names_log.append(entity_log.name if hasattr(entity_log, 'name') else node_id_log)
                    except KeyError:
                        input_node_names_log.append(f"{node_id_log} (not found in graph)")
                
                logger.info(f"  - Rule: '{rule_name_log}', Inputs: {input_node_names_log}, Simulated Score: {sim_score:.4f}")

        # Sample an action probabilistically based on scores
        actions, scores = zip(*simulated_results)
        scores_np = np.array(scores, dtype=float) # Ensure float numpy array

        # Handle case where all successful simulation scores are identical
        # Use a tolerance for floating point comparison
        if len(scores_np) > 0 and np.all(np.isclose(scores_np, scores_np[0])):
            logger.info(f"All {len(scores_np)} successfully simulated actions resulted in approximately the same score ({scores_np[0]:.4f}). Choosing uniformly.")
            # Use self.rng for random choice
            return actions[self.rng.randint(0, len(actions))]

        # Apply temperature and convert to probabilities using softmax
        # Shift scores for numerical stability (subtracting max doesn't change softmax outcome)
        scores_shifted = scores_np - np.max(scores_np)
        exp_scores = np.exp(scores_shifted / self.temperature)

        sum_exp_scores = np.sum(exp_scores)

        # Check for invalid sum (e.g., all scores were -inf after shift, or resulted in NaN/inf)
        if sum_exp_scores <= 0 or not np.isfinite(sum_exp_scores):
             logger.warning(f"Sum of exponentiated scores ({sum_exp_scores}) is invalid. Falling back to uniform probabilities among successful simulations.")
             probs = np.ones(len(actions)) / len(actions)
        else:
             probs = exp_scores / sum_exp_scores
             # Normalize probabilities to ensure they sum to 1, handling potential floating point inaccuracies
             probs /= np.sum(probs)


        # Ensure valid probability distribution before sampling
        if np.isnan(probs).any() or not np.isclose(np.sum(probs), 1.0):
            logger.warning(f"Calculated probability distribution is invalid (sum={np.sum(probs)}). Using uniform.")
            probs = np.ones(len(actions)) / len(actions)

        # Sample an action based on probabilities using self.rng
        try:
            chosen_index = self.rng.choice(len(actions), p=probs)
            selected_action = actions[chosen_index]
            selected_score = scores[chosen_index] # Original score for logging
        except ValueError as ve:
             # This might happen if probs still don't sum exactly to 1 due to float issues
             logger.warning(f"Error during weighted choice ({ve}). Falling back to uniform random choice among successful simulations.")
             chosen_index = self.rng.randint(0, len(actions))
             selected_action = actions[chosen_index]
             selected_score = scores[chosen_index]


        rule_name = self.rules[selected_action.rule_idx].name
        logger.info(f"Selected action (simulated) with rule '{rule_name}' (score: {selected_score:.4f}) using temperature {self.temperature}")
        return selected_action
    
    def _select_action_by_rule_random(self, env: MathEnv, candidate_actions: List[ValidAction]) -> Optional[ValidAction]:
        """
        Select an action by first randomly choosing a rule, then an action using that rule.
        
        This approach first groups actions by rule, then:
        1. Randomly selects a rule with available actions
        2. Randomly selects an action that uses that rule
        
        This ensures a more balanced exploration across different rule types.
        
        Args:
            env: The MathEnv environment
            candidate_actions: List of valid actions to choose from
            
        Returns:
            A selected ValidAction or None if no valid actions
        """
        logger.info(f"Using 'RULE_BASED_RANDOM' action selection strategy.")
        # Group actions by rule
        valid_actions_by_rule = defaultdict(list)
        for action in candidate_actions:
            rule_name = self.rules[action.rule_idx].name
            valid_actions_by_rule[rule_name].append(action)
        
        # Get all rule names
        rule_names = list(valid_actions_by_rule.keys())
        
        # Sample a random rule and then a random action from that rule
        selected_rule_name = rule_names[self.rng.randint(0, len(rule_names))]
        rule_actions = valid_actions_by_rule[selected_rule_name]
        selected_action = rule_actions[self.rng.randint(0, len(rule_actions))]
        
        logger.info(f"Selected action with rule '{selected_rule_name}'")
        return selected_action

    def update(self, 
              env: MathEnv,
              action: Union[int, ValidAction], 
              reward: float, 
              done: bool) -> None:
        """Update the policy based on observed reward.
        
        Currently a no-op as this is not a learning policy.
        
        Args:
            env: The environment
            action: The action that was taken
            reward: The reward received
            done: Whether the episode is done
        """
        # Currently a no-op - for future reinforcement learning extensions
        pass

    def _score_entity_public(self, entity_id: str, graph: Optional[KnowledgeGraph] = None) -> float:
        """
        Public method for scoring entities, handling graph reference automatically.
        
        Args:
            entity_id: The ID of the entity to score
            graph: Optional knowledge graph (uses current graph if None)
            
        Returns:
            Interestingness score
            
        Raises:
            ValueError: If no graph is provided and no current graph is set
        """
        # Use provided graph or current graph
        graph_to_use = graph if graph is not None else self._current_graph
        
        if graph_to_use is None:
            raise ValueError("No graph provided for scoring and no current graph set")
            
        # Call the internal scoring function
        return self._score_function(entity_id, graph_to_use)

def main():
    """Demonstrate different configurations of the InterestingnessGuidedPolicy."""
    # Import interestingness functions from DSL primitives
    from frame.interestingness.learning.dsl_primitives import (
        HR_INTERESTINGNESS_FUNCTION,
        recreate_comprehensibility,
        recreate_parsimony
    )
    
    # Setup logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)
    
    # Create an empty knowledge graph for demonstration purposes
    # In a real application, this would contain actual mathematical concepts
    graph = KnowledgeGraph()
    
    # Example 1: Policy with HR interestingness function
    # --------------------------------------------------
    hr_policy = InterestingnessGuidedPolicy(
        concept_selection=ConceptSelectionStrategy.INTERESTINGNESS,
        action_selection=ActionSelectionStrategy.SIMULATE_AND_SCORE,
        interestingness_scorer=HR_INTERESTINGNESS_FUNCTION,
        top_k_concepts=8,
        temperature=1.5
    )
    
    logger.info("\nExample 1: Policy with HR interestingness functions")
    logger.info(f"  Concept selection: {hr_policy.concept_selection}")
    logger.info(f"  Action selection: {hr_policy.action_selection}")
    logger.info(f"  Using combined comprehensibility and parsimony measures")
    
    # Example 2: Policy using a function path
    # -----------------------------------------------
    function_path_policy = InterestingnessGuidedPolicy(
        concept_selection=ConceptSelectionStrategy.INTERESTINGNESS,
        action_selection=ActionSelectionStrategy.RULE_BASED_RANDOM,
        interestingness_function_path="frame/interestingness/learning/generated_programs/hr_weighted.py",
        top_k_concepts=5,
        temperature=1.2
    )
    
    logger.info("\nExample 2: Policy with function loaded from path")
    logger.info(f"  Concept selection: {function_path_policy.concept_selection}")
    logger.info(f"  Action selection: {function_path_policy.action_selection}")
    logger.info(f"  Function path: frame/interestingness/learning/generated_programs/hr_weighted.py")
    
    # Example 3: Policy with direct interestingness scoring (custom function)
    # ----------------------------------------------------
    def custom_interestingness_scorer(entity_id, graph):
        """A simple example interestingness scorer."""
        # In a real application, this would analyze properties of the entity
        # For this example, we'll just use a simple heuristic based on the entity_id
        return len(entity_id) / 10.0  # Simple normalization
    
    direct_policy = InterestingnessGuidedPolicy(
        concept_selection=ConceptSelectionStrategy.INTERESTINGNESS,
        action_selection=ActionSelectionStrategy.SIMULATE_AND_SCORE,
        interestingness_scorer=custom_interestingness_scorer,
        top_k_concepts=5,
        action_selection_params={'simulation_limit': 25},  # More simulations
        temperature=1.2
    )
    
    logger.info("\nExample 3: Policy with direct interestingness scoring")
    logger.info(f"  Concept selection: {direct_policy.concept_selection}")
    logger.info(f"  Action selection: {direct_policy.action_selection}")
    logger.info(f"  Simulation limit: {direct_policy.simulation_limit}")

    # Example 4: Policy using just one HR function (comprehensibility)
    # ---------------------------------------------------------------
    comprehensibility_policy = InterestingnessGuidedPolicy(
        concept_selection=ConceptSelectionStrategy.INTERESTINGNESS,
        action_selection=ActionSelectionStrategy.RULE_BASED_RANDOM,
        interestingness_scorer=recreate_comprehensibility,
        top_k_concepts=6,
        temperature=1.0
    )
    
    logger.info("\nExample 4: Policy using just comprehensibility")
    logger.info(f"  Concept selection: {comprehensibility_policy.concept_selection}")
    logger.info(f"  Action selection: {comprehensibility_policy.action_selection}")
    logger.info(f"  Using only the comprehensibility measure")
    
    # Example 5: Random policy
    # -----------------------
    # This example uses random selection for both concepts and actions
    
    random_policy = InterestingnessGuidedPolicy(
        concept_selection=ConceptSelectionStrategy.RANDOM,
        action_selection=ActionSelectionStrategy.PURE_RANDOM,
        top_k_concepts=10
    )
    
    logger.info("\nExample 5: Random policy")
    logger.info(f"  Concept selection: {random_policy.concept_selection}")
    logger.info(f"  Action selection: {random_policy.action_selection}")
    logger.info(f"  This policy performs pure random exploration.")
    
    # Summary
    logger.info("\nSummary:")
    logger.info("The InterestingnessGuidedPolicy is a flexible policy framework that can:")
    logger.info("1. Use interestingness functions directly from code")
    logger.info("2. Load interestingness functions from Python files")
    logger.info("3. Perform pure random exploration")
    logger.info("\nBy configuring concept selection and action selection independently,")
    logger.info("this policy can be adapted to many different exploration strategies.")


if __name__ == "__main__":
    main() 