"""Evaluation function for creative reasoning solutions."""

import json
import hashlib
from typing import List, Tuple, Optional, Dict, Any
import numpy as np
from pathlib import Path

from src.data_models.task_config import TaskConfig
from src.data_models.evaluation_result import EvaluationResult
from src.utils.llm_api_client import LLMAPIClient


def _save_evaluation_intermediate_logs(evaluation_llm_interactions: List[dict], run_id: str, solution_id: str) -> str:
    """Save evaluation-specific intermediate logs to a JSON file.
    
    Args:
        evaluation_llm_interactions: List of LLM interaction dictionaries
        run_id: Unique identifier for this run
        solution_id: Unique identifier for this solution
        
    Returns:
        Relative path to the saved JSON file
        
    Raises:
        IOError: If there's an error writing to the file
    """
    # Define the results directory and file path
    results_dir = Path(__file__).parent.parent.parent / "results" / "intermediate_logs"
    log_filename = f"eval_{run_id}_{solution_id}.json"
    log_file_path = results_dir / log_filename
    
    # Create results directory if it doesn't exist
    try:
        results_dir.mkdir(parents=True, exist_ok=True)
    except OSError as e:
        raise IOError(f"Failed to create results directory {results_dir}: {e}")
    
    try:
        # Write to JSON file
        with open(log_file_path, 'w', encoding='utf-8') as f:
            json.dump(evaluation_llm_interactions, f, indent=2, ensure_ascii=False)
        
        return f"intermediate_logs/{log_filename}"
        
    except IOError as e:
        raise IOError(f"Failed to write evaluation intermediate log file {log_file_path}: {e}")


def run_evaluation(solution_text: str, task_config: TaskConfig, run_id: str, num_final_solutions: int = 3, pre_extracted_solutions_with_themes: Optional[List[Dict[str, Any]]] = None) -> List[EvaluationResult]:
    """Run evaluation on a solution and return scores.
    
    Args:
        solution_text: The generated solution text to evaluate (potentially multi-solution)
        task_config: Task configuration object containing checkpoints and known solutions
        run_id: Unique identifier for this run
        num_final_solutions: Maximum number of solutions to extract and evaluate
        pre_extracted_solutions_with_themes: Optional list of pre-extracted solutions with themes
        
    Returns:
        List of EvaluationResult objects, one for each extracted solution
        
    Raises:
        ValueError: If inputs are invalid
        Exception: For LLM API failures or parsing errors
    """
    # Validate inputs
    if not isinstance(solution_text, str):
        raise ValueError("solution_text must be a string")
    
    if not isinstance(task_config, TaskConfig):
        raise ValueError("task_config must be a TaskConfig object")
    
    if not solution_text.strip():
        raise ValueError("solution_text cannot be empty")
    
    # Initialize LLM API client
    llm_client = LLMAPIClient()
    
    # Step 1: Extract individual solutions from the raw text or use pre-extracted solutions
    if pre_extracted_solutions_with_themes and len(pre_extracted_solutions_with_themes) > 0:
        # Use pre-extracted solutions, bypassing LLM call
        print("Using pre-extracted solutions, skipping LLM extraction...")
        extracted_solutions = []
        for i, solution_data in enumerate(pre_extracted_solutions_with_themes):
            if isinstance(solution_data, dict) and 'solution_text' in solution_data:
                solution_id = solution_data.get('original_solution_id', f'sol_{i+1}_pre_extracted')
                solution_text = solution_data['solution_text']
                extracted_solutions.append((solution_id, solution_text))
            else:
                print(f"Warning: Invalid solution data format at index {i}, skipping...")
        
        if not extracted_solutions:
            raise ValueError("No valid pre-extracted solutions found")
    else:
        # Use LLM to extract solutions
        extracted_solutions = _extract_solutions(llm_client, solution_text, num_final_solutions)
        
        if not extracted_solutions:
            raise ValueError("No valid solutions could be extracted from the input text")
    
    # Step 2: Evaluate each extracted solution
    evaluation_results = []
    
    for i, (solution_id, solution_text) in enumerate(extracted_solutions):
        print(f"Evaluating solution: {solution_id} out of {len(extracted_solutions)}")
        
        # Initialize evaluation-specific LLM interactions list
        evaluation_llm_interactions = []
        
        # Add audit log entry if using pre-extracted solutions
        if pre_extracted_solutions_with_themes and len(pre_extracted_solutions_with_themes) > 0:
            from datetime import datetime
            audit_entry = {
                "step_name": "Evaluation: Solution Canonicalization and Extraction (Skipped - Pre-existing)",
                "llm_model_name": "N/A",
                "temperature": 0.0,
                "timestamp": datetime.now().isoformat(),
                "prompt": "N/A",
                "raw_response": "N/A",
                "parsed_output": "Pre-extracted solutions used",
                "error": "None"
            }
            evaluation_llm_interactions.append(audit_entry)
        
        # Calculate feasibility score with reasoning
        feasibility_score, feasibility_reasoning = _calculate_feasibility_score(
            llm_client, solution_text, task_config.feasibility_check_points, evaluation_llm_interactions
        )
        
        # Calculate utility score with reasoning
        utility_score, utility_reasoning = _calculate_utility_score(
            llm_client, solution_text, task_config, evaluation_llm_interactions
        )
        
        # Get pre-existing novelty theme if available
        pre_existing_novelty_theme = None
        if (pre_extracted_solutions_with_themes and 
            len(pre_extracted_solutions_with_themes) > i and 
            isinstance(pre_extracted_solutions_with_themes[i], dict) and
            'novelty_theme' in pre_extracted_solutions_with_themes[i]):
            pre_existing_novelty_theme = pre_extracted_solutions_with_themes[i]['novelty_theme']
        
        # Calculate novelty score with theme
        novelty_score, novelty_theme = _calculate_novelty_score(
            llm_client, solution_text, task_config, evaluation_llm_interactions, pre_existing_novelty_theme
        )
        
        # Calculate overall creativity score (weighted average)
        creativity_score = _calculate_creativity_score(
            feasibility_score, utility_score, novelty_score
        )
        
        # Save evaluation-specific intermediate logs
        try:
            intermediate_log_filename = _save_evaluation_intermediate_logs(
                evaluation_llm_interactions, run_id, solution_id
            )
        except IOError as e:
            print(f"Warning: Failed to save evaluation intermediate logs for {solution_id}: {e}")
            intermediate_log_filename = None
        
        # Create evaluation result
        result = EvaluationResult(
            original_solution_id=solution_id,
            individual_solution_text=solution_text,
            feasibility_score=round(feasibility_score, 3),
            utility_score=round(utility_score, 3),
            novelty_score=round(novelty_score, 3),
            creativity_score=round(creativity_score, 3),
            intermediate_log_filename=intermediate_log_filename,
            feasibility_reasoning=feasibility_reasoning,
            utility_reasoning=utility_reasoning,
            novelty_theme=novelty_theme
        )
        
        evaluation_results.append(result)
    
    return evaluation_results


def _extract_solutions(llm_client: LLMAPIClient, solution_text: str, num_final_solutions: int) -> List[Tuple[str, str]]:
    """Extract and canonicalize individual solutions from raw text using LLM.
    
    Args:
        llm_client: LLM API client instance
        solution_text: Raw text potentially containing multiple solutions
        num_final_solutions: Maximum number of solutions to extract
        
    Returns:
        List of tuples: (solution_id, canonicalized_solution_text)
        
    Raises:
        Exception: If LLM extraction fails or returns invalid format
    """
    prompt = f"""
You are a solution processing assistant. Your task is to identify all individual solutions from the text provided and canonicalize each one into a concise paragraph that functions as an operational blueprint.

**Text to Process:**
{solution_text}

---

**Instructions:**
1.  First, scan the text and identify the boundaries of each distinct solution.
2.  For each solution, author a self-contained paragraph describing its **core functional mechanism**. This paragraph must identify the essential components, their primary inputs and outputs, and explain **how they interact** to produce the final result.
3.  If an inspirational domain is mentioned, include it for context.
4.  The level of detail must be sufficient to make the process unambiguous and reproducible in principle, but should **not be a granular, step-by-step implementation guide.** Omit low-level details like specific parameters or mathematical formulas.

Return your response as a single, valid JSON array of strings, where each string is a descriptive paragraph.

**Example of the Process:**
* **Input Text:** "Our best strategy is the Rolling reservations with batch-optimal switching: We'll collect intended crossings via an app and maintain a rolling horizon of timestamped requests. Then, we form same-direction batches of at least size E and apply a myopic marginal-cost rule to decide when to switch directions, which is much more efficient."
* **Desired Canonicalized Output in JSON:** `["The core functional mechanism is a reservation-based batch processing system. The essential components are a user application for submitting crossing requests (input) and a central scheduler. The scheduler collects these requests into a rolling time horizon and groups them into same-direction batches of a minimum size. It then uses a myopic marginal-cost algorithm to determine the optimal time to switch traffic directions (output), an interaction designed to minimize overall delay."]`

**Example format for the final output:**
["Descriptive paragraph for solution 1.", "Descriptive paragraph for solution 2.", ...]

Return ONLY the JSON array now:
"""
    
    try:
        print(f"🔄 Starting LLM solution extraction and canonicalization...")
        response = llm_client.call_gemini(
            prompt=prompt,
            model_name="gemini-2.5-pro",
            temperature=0.0
        )
        print(f"✅ Solution extraction and canonicalization completed")
        
        # Debug: print the raw response
        print(f"Debug - Raw LLM response: {repr(response)}")
        
        # Parse JSON response using the utility function
        from src.utils.llm_response_parser import extract_json_from_response
        solutions = extract_json_from_response(response)
        
        if not isinstance(solutions, list):
            raise ValueError("LLM response is not a list")
        
        # Create solution IDs and filter out empty solutions
        extracted_solutions = []
        for i, solution in enumerate(solutions):
            if isinstance(solution, str) and solution.strip():
                # Create unique ID based on content hash using SHA256
                content_hash = hashlib.sha256(solution.encode()).hexdigest()[:8]
                solution_id = f"sol_{i+1}_{content_hash}"
                extracted_solutions.append((solution_id, solution.strip()))
        
        # Limit to num_final_solutions
        return extracted_solutions[:num_final_solutions]
        
    except json.JSONDecodeError as e:
        raise Exception(f"Failed to parse LLM response as JSON: {e}")
    except Exception as e:
        raise Exception(f"Solution extraction failed: {e}")


def _calculate_feasibility_score(
    llm_client: LLMAPIClient, 
    solution_text: str, 
    check_points: List[str],
    evaluation_llm_interactions: List[dict]
) -> Tuple[float, str]:
    """Calculate feasibility score based on check points.
    
    Args:
        llm_client: LLM API client instance
        solution_text: Individual solution text to evaluate
        check_points: List of feasibility check points
        evaluation_llm_interactions: List to capture LLM interaction logs
        
    Returns:
        Tuple of (feasibility score from 0.0 to 1.0, reasoning)
    """
    check_points_text = "\n".join([f"- {point}" for point in check_points])
    
    prompt = f"""
Evaluate the feasibility of the following solution against these check points:

Solution: {solution_text}

Check Points:
{check_points_text}

Return a JSON object with two fields:
- "score": a number between 0.0 and 1.0 representing the feasibility score, where:
  - 0.0 = None of the check points are met
  - 1.0 = All check points are fully met
  - Values in between represent partial fulfillment
- "reasoning": a 1-2 sentence explanation of your evaluation

Please score it on the assumption that the underlying technology and strategy are sound.

Example format:
{{"score": 0.8, "reasoning": "The solution addresses most check points but lacks detail on implementation timeline."}}

Return the JSON now:
"""
    
    # Import datetime at the top level to avoid UnboundLocalError
    from datetime import datetime
    
    try:
        print(f"🔄 Starting LLM feasibility evaluation...")
        response = llm_client.call_gemini(
            prompt=prompt,
            model_name="gemini-2.5-pro",
            temperature=0
        )
        print(f"✅ Feasibility evaluation completed")
        
        # Capture LLM interaction
        interaction_log = {
            "prompt": prompt,
            "raw_response": response,
            "model": "gemini-2.5-pro",
            "temperature": 0,
            "timestamp": datetime.now().isoformat(),
            "error": None
        }
        evaluation_llm_interactions.append(interaction_log)
        
        # Extract score and reasoning using the utility function
        from src.utils.llm_response_parser import extract_score_and_reasoning_from_response
        score, reasoning = extract_score_and_reasoning_from_response(response)
        
        # Ensure score is within valid range
        score = max(0.0, min(1.0, score))
        
        return score, reasoning
        
    except (ValueError, TypeError) as e:
        # Fallback to default score if parsing fails
        error_reasoning = f"Failed to parse feasibility response: {e}"
        interaction_log["error"] = str(e)
        return 0.5, error_reasoning
    except Exception as e:
        # Fallback to default score if LLM call fails
        error_reasoning = f"LLM call failed: {e}"
        interaction_log = {
            "prompt": prompt,
            "raw_response": "",
            "model": "gemini-2.5-pro",
            "temperature": 0,
            "timestamp": datetime.now().isoformat(),
            "error": str(e)
        }
        evaluation_llm_interactions.append(interaction_log)
        return 0.5, error_reasoning


def _calculate_utility_score(
    llm_client: LLMAPIClient, 
    solution_text: str, 
    task_config: TaskConfig,
    evaluation_llm_interactions: List[dict]
) -> Tuple[float, str]:
    """Calculate utility score based on task description and improvement over known solutions.
    
    Args:
        llm_client: LLM API client instance
        solution_text: Individual solution text to evaluate
        task_config: TaskConfig object containing task information and calibration anchors
        evaluation_llm_interactions: List to capture LLM interaction logs
        
    Returns:
        Tuple of (utility score representing improvement over known solutions, reasoning)
    """
    # Format known solutions as a numbered list to avoid f-string formatting issues
    known_solutions_text = "\n".join([f"{i+1}. {solution}" for i, solution in enumerate(task_config.known_solutions)])
    
    # Format optimal solution as upper bound reference if available
    optimal_solution_text = ""
    if task_config.optimal_solutions_description and len(task_config.optimal_solutions_description) > 0:
        optimal_solution = task_config.optimal_solutions_description[0]
        optimal_solution_text = f"**Optimal Solution Reference (Score 10 - Maximum Utility):**\n{optimal_solution}\n\n"
    
    # Single LLM call to evaluate all solutions at once
    evaluation_prompt = f"""
You are an expert systems evaluator. Your task is to assess the utility of the "Current Solution" by placing it on a percentile scale relative to the provided "Known Solutions."

{optimal_solution_text}**Task Description:**
{task_config.task_description}

---

**Known Solutions (Consider these a representative sample of standard, conventional approaches):**
{known_solutions_text}

---

**Current Solution to Evaluate:**
{solution_text}

---

**Instructions:**
1.  Analyze the Current Solution's effectiveness at achieving the primary objective in the Task Description.
2.  Compare this effectiveness to the quality of the Known Solutions.
3.  Place the Current Solution on a scale from 0 to 10 based on the rubric below. A score of 5 means it is a minor, incremental improvement over the average known solution.

Evaluate the effectiveness of the core idea itself, not the quality of its description or implementation plan.

**Scoring Rubric (Absolute Improvement Scale):**
* **0-2 (Incremental):** A minor or trivial improvement over the baseline. The core strategy is still conventional and offers only marginal gains.
* **3-5 (Competent):** A solid, well-defined solution that is clearly more effective than the baseline. It represents a competent, but not necessarily innovative, approach.
* **6-8 (Significant):** A significant conceptual or practical improvement. The strategy is advanced, highly effective, and demonstrates a sophisticated understanding of the problem's dynamics.
* **9-10 (Transformative):** A state-of-the-art or paradigm-shifting approach. The solution reframes the problem or introduces a mechanism that is fundamentally superior to conventional strategies.

Return a single JSON object with two fields:
-   **`score`**: A single integer from 0 to 10 representing the placement.
-   **`reasoning`**: A 1-2 sentence justification for the score, explicitly referencing the rubric and comparing the Current Solution to the Known Solutions.

**Example format:**
```json
{{
  "score": 6,
  "reasoning": "The solution is a significant improvement because its demand-shaping mechanism is a more advanced concept than the simple reactive scheduling seen in the known solutions, placing it in the 61-80th percentile."
}}
```
"""
    
    try:
        print(f"🔄 Starting LLM utility evaluation for solution and {len(task_config.known_solutions)} known solutions...")
        response = llm_client.call_gemini(
            prompt=evaluation_prompt,
            model_name="gemini-2.5-pro",
            temperature=0
        )
        print(f"✅ Utility evaluation completed")
        
        # Capture LLM interaction
        from datetime import datetime
        interaction_log = {
            "prompt": evaluation_prompt,
            "raw_response": response,
            "model": "gemini-2.5-pro",
            "temperature": 0,
            "timestamp": datetime.now().isoformat(),
            "error": None
        }
        evaluation_llm_interactions.append(interaction_log)
        
        # Parse the response using the utility function
        from src.utils.llm_response_parser import extract_json_from_response
        result = extract_json_from_response(response)
        
        
        current_score = result.get("score", 5) / 10.0
        reasoning = result.get("reasoning", "No reasoning provided")
        
        return current_score, reasoning
        
        
            
    except Exception as e:
        # Fallback to default score if LLM call fails
        print(f"Utility score calculation failed: {e}")
        error_reasoning = f"LLM call failed: {e}"
        from datetime import datetime
        interaction_log = {
            "prompt": evaluation_prompt,
            "raw_response": "",
            "model": "gemini-2.5-pro",
            "temperature": 0,
            "timestamp": datetime.now().isoformat(),
            "error": str(e)
        }
        evaluation_llm_interactions.append(interaction_log)
        return 1.0, error_reasoning  # Default to no improvement


def _calculate_novelty_score(
    llm_client: LLMAPIClient, 
    solution_text: str, 
    task_config: TaskConfig,
    evaluation_llm_interactions: List[dict],
    pre_existing_novelty_theme: Optional[str] = None
) -> Tuple[float, str]:
    """Calculate novelty score using LLM theme extraction and embeddings.
    
    Args:
        llm_client: LLM API client instance
        solution_text: Individual solution text to evaluate
        task_config: TaskConfig object containing known solutions and concepts
        evaluation_llm_interactions: List to capture LLM interaction logs
        pre_existing_novelty_theme: Optional pre-existing novelty theme to use
        
    Returns:
        Tuple of (novelty score from 0.0 to 1.0, extracted theme)
    """
    try:
        # Step 1: Extract main theme from the current solution using LLM or use pre-existing theme
        if pre_existing_novelty_theme and pre_existing_novelty_theme.strip():
            # Use pre-existing theme, bypassing LLM call
            current_theme = pre_existing_novelty_theme.strip()
            print(f"✅ Using pre-existing novelty theme: '{current_theme}'")
            
            # Add audit log entry for skipped LLM call
            from datetime import datetime
            audit_entry = {
                "step_name": "Evaluation: Novelty Key Phrase Extraction (Skipped - Pre-existing)",
                "llm_model_name": "N/A",
                "temperature": 0.0,
                "timestamp": datetime.now().isoformat(),
                "prompt": "N/A",
                "raw_response": "N/A",
                "parsed_output": "Pre-existing novelty theme used",
                "error": "None"
            }
            evaluation_llm_interactions.append(audit_entry)
        else:
            # Extract theme using LLM
            print(f"🔄 Starting LLM theme extraction for current solution...")
            theme_extraction_prompt = f"""
Deconstruct the following solution into its core operational logic to create a "conceptual fingerprint." Your goal is to create a single, dense phrase that describes the solution's fundamental system of operation, ignoring prose and justifications.

**To do this, you will:**
1.  **Identify the Core Mechanism:** The central process or tool (e.g., a timer, an auction, a sensor).
2.  **Identify the Actors & Interaction:** The key components and what they do (e.g., vehicles submit bids).
3.  **Identify the Governing Principle:** The underlying rule that dictates the outcome (e.g., fixed intervals, willingness to pay).
4.  **Synthesize:** Combine these elements into a single, logical phrase that describes the system.

**Crucial Rule:** Do NOT summarize into high-level keywords. The output must describe the unique *process* or *system rule*.

**Solution to Analyze:**
{solution_text}

**Return format:**
"A system where [actors] [interaction] based on the principle of [governing principle]."

**Examples of Correct Output:**
- "A system where a timer dictates a mandatory directional switch at a fixed interval, irrespective of real-time traffic demand."
- "A system where a sensor triggers a directional switch only when the queue of vehicles on the currently active side becomes empty."
- "A system that processes a fixed number of vehicles as a single batch and then triggers a mandatory directional switch upon that batch's completion."
"""
            
            response = llm_client.call_gemini(
                prompt=theme_extraction_prompt,
                model_name="gemini-2.5-pro",
                temperature=0
            )
            current_theme = response.strip()
            print(f"✅ Theme extraction completed: '{current_theme}'")
            
            # Capture LLM interaction for theme extraction
            from datetime import datetime
            interaction_log = {
                "prompt": theme_extraction_prompt,
                "raw_response": response,
                "model": "gemini-2.5-pro",
                "temperature": 0,
                "timestamp": datetime.now().isoformat(),
                "error": None
            }
            evaluation_llm_interactions.append(interaction_log)
        
        # Step 2: Determine which solutions to use for initial novelty calculation
        # Always use known_solutions_concept for initial novelty calculation
        if task_config.known_solutions_concept and len(task_config.known_solutions_concept) > 0:
            solutions_for_initial_novelty = task_config.known_solutions_concept
        else:
            solutions_for_initial_novelty = task_config.known_solutions
        
        # Step 3: Generate embeddings for current theme and known solutions concepts
        initial_themes = [current_theme] + solutions_for_initial_novelty
        print(f"🔄 Starting embedding generation for {len(initial_themes)} themes...")
        initial_embeddings = llm_client.embed_content("gemini-embedding-001", initial_themes)
        print(f"✅ Initial embedding generation completed")
        
        if len(initial_embeddings) != len(initial_themes):
            raise Exception("Initial embedding count mismatch")
        
        current_embedding = initial_embeddings[0]
        known_concept_embeddings = initial_embeddings[1:]
        
        # Step 4: Calculate initial novelty score against known_solutions_concept
        initial_similarities = []
        for known_concept_embedding in known_concept_embeddings:
            similarity = _cosine_similarity(current_embedding, known_concept_embedding)
            initial_similarities.append(similarity)
        
        # Handle edge case: if no known solutions concepts, return default score
        if not initial_similarities:
            return 0.5, current_theme
        
        # Calculate initial novelty score as 1 - max(similarity)
        max_initial_similarity = max(initial_similarities)
        initial_novelty_score = 1.0 - max_initial_similarity
        
        # Step 5: Calculate maximum distance between known_solutions_concept and optimal_solutions_concept
        max_known_optimal_distance = 0.0
        if (task_config.optimal_solutions_concept and 
            len(task_config.optimal_solutions_concept) > 0 and 
            len(known_concept_embeddings) > 0):
            
            # Generate embeddings for optimal solutions concepts
            print(f"🔄 Starting embedding generation for {len(task_config.optimal_solutions_concept)} optimal concepts...")
            optimal_embeddings = llm_client.embed_content("gemini-embedding-001", task_config.optimal_solutions_concept)
            print(f"✅ Optimal embedding generation completed")
            
            # Calculate maximum distance between any known concept and any optimal concept
            for known_concept_embedding in known_concept_embeddings:
                for optimal_concept_embedding in optimal_embeddings:
                    similarity = _cosine_similarity(known_concept_embedding, optimal_concept_embedding)
                    distance = 1.0 - similarity
                    max_known_optimal_distance = max(max_known_optimal_distance, distance)
        
        # Step 6: Normalize initial novelty score using max_known_optimal_distance
        if max_known_optimal_distance > 0:
            # Scale the initial novelty score to the range [0, 1] using the maximum known-optimal distance
            # This ensures the score is normalized relative to the benchmark distance
            novelty_score = min(initial_novelty_score / max_known_optimal_distance, 1.0)
        else:
            # If no optimal solutions or max distance is 0, use the initial score as-is
            novelty_score = initial_novelty_score
        
        return max(0.0, min(1.0, novelty_score)), current_theme
        
    except Exception as e:
        # Fallback to default score if theme extraction or embedding fails
        print(f"Novelty score calculation failed: {e}")
        error_theme = ""
        interaction_log = {
            "prompt": theme_extraction_prompt,
            "raw_response": "",
            "model": "gemini-2.5-pro",
            "temperature": 0,
            "timestamp": datetime.now().isoformat(),
            "error": str(e)
        }
        evaluation_llm_interactions.append(interaction_log)
        return 0.5, error_theme


def _cosine_similarity(vec1: List[float], vec2: List[float]) -> float:
    """Calculate cosine similarity between two vectors.
    
    Args:
        vec1: First vector
        vec2: Second vector
        
    Returns:
        Cosine similarity score from -1.0 to 1.0
    """
    try:
        # Convert to numpy arrays for efficient calculation
        v1 = np.array(vec1)
        v2 = np.array(vec2)
        
        # Calculate cosine similarity
        dot_product = np.dot(v1, v2)
        norm_v1 = np.linalg.norm(v1)
        norm_v2 = np.linalg.norm(v2)
        
        if norm_v1 == 0 or norm_v2 == 0:
            return 0.0
        
        similarity = dot_product / (norm_v1 * norm_v2)
        return float(similarity)
        
    except Exception:
        # Fallback to simple dot product if numpy fails
        return 0.0


def _calculate_creativity_score(
    feasibility_score: float, 
    utility_score: float, 
    novelty_score: float
) -> float:
    """Calculate overall creativity score as weighted average.
    
    Args:
        feasibility_score: Feasibility score (0.0-1.0)
        utility_score: Utility score (0.0-1.0)
        novelty_score: Novelty score (0.0-1.0)
        
    Returns:
        Overall creativity score (0.0-1.0)
    """
    # Weighted average: feasibility (30%), utility (30%), novelty (40%)
    creativity_score = (
        0.5 * utility_score + 
        0.5 * novelty_score
    )
    
    return max(0.0, min(1.0, creativity_score))
