# pip install dspy-ai
import dspy
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()

import numpy as np
from experiment import Experiment

def run_experiment(system_prompt: str, instance_prompt: str, exp_num: int) -> float:
    experiment = Experiment(
        evolution_output_dir=Path("tool_gen/generated_dspy"),
        exp_num=exp_num,
        chosen_tools=[],
        instances=[],
        template_dir=Path("tool_gen/template"),
        designed_agent_config=None,
    )
    experiment.run()
    return experiment.result.p2p_success





###############################################################################
# 0) Configure your base LM (any provider DSPy supports)
###############################################################################
# Example: Anthropic; change to your stack (e.g., vLLM, OpenAI, etc.)
lm = dspy.LM(
    model="anthropic/claude-3-5-sonnet-20241022", 
    api_key=os.getenv("ANTHROPIC_API_KEY"),
    max_tokens=4000
)
dspy.configure(lm=lm)

###############################################################################
# 1) Define the signature for prompt generation
###############################################################################
class PromptGeneratorSignature(dspy.Signature):
    """Generate high-quality prompts for SWE-agent.
    
    Create:
    1) system_prompt: concise system message guiding the agent's role, safety,
       coding standards, and tool-use policy.
    2) instance_prompt: a template with slots like {issue}, {repo_context}, etc.,
       that elicits concrete, testable patches and commands.
    """
    design_notes: str = dspy.InputField(desc="Constraints and guidelines for prompt design")
    system_prompt: str = dspy.OutputField(desc="System-level instructions for the agent")
    instance_prompt: str = dspy.OutputField(desc="Instance template with placeholders")

###############################################################################
# 2) Define the main program for prompt optimization
###############################################################################
class PromptOptimizer(dspy.Module):
    def __init__(self, design_notes: str = ""):
        super().__init__()
        self.design_notes = design_notes
        self.generator = dspy.ChainOfThought(PromptGeneratorSignature)

    def forward(self) -> dict[str, str]:
        result = self.generator(design_notes=self.design_notes)
        return {
            "system_prompt": result.system_prompt,
            "instance_prompt": result.instance_prompt
        }

###############################################################################
# 3) Evaluation metric for prompt quality
###############################################################################
def evaluate_prompts(example, prediction, trace=None) -> float:
    """Evaluate the quality of generated prompts.
    
    Args:
        example: The input example (unused in this case)
        prediction: The generated prompts dict
        trace: Optional trace information
        
    Returns:
        Score between 0 and 1, where higher is better
    """
    # Replace with actual evaluation logic:
    # score = run_experiment(prediction["system_prompt"], prediction["instance_prompt"])
    score = np.random.random()  # Placeholder - replace with real evaluation
    return float(score)

###############################################################################
# 4) Setup training data and optimization
###############################################################################
# Design constraints that prompts should respect
design_notes = """
- Keep tool calls JSON-only and single-line; never hallucinate paths.
- Prefer minimal diffs; justify each hunk against failing tests.
- Require a final 'Run tests' step; summarize failing->passing tests delta.
- Instance template MUST include placeholders: {issue}, {repo_context}, {history}.
"""

# Create examples for optimization
examples = [
    dspy.Example().with_inputs()  # Empty example since we're optimizing prompt generation itself
]

# Initialize the program
program = PromptOptimizer(design_notes=design_notes)

###############################################################################
# 5) Configure and run optimization
###############################################################################
# Use MIPROv2 with auto-configuration for latest best practices
teleprompter = dspy.MIPROv2(
    metric=evaluate_prompts,
    auto="medium"  # Auto-configure hyperparameters (overrides num_candidates)
)

# Compile the optimized program
optimized_program = teleprompter.compile(
    student=program,
    trainset=examples,
    valset=examples,  # Using same set for simplicity
    requires_permission_to_run=False
)

###############################################################################
# 6) Generate and use optimized prompts
###############################################################################
def main() -> None:
    """Main function to run prompt optimization."""
    # Generate optimized prompts
    best_prompts = optimized_program()
    
    print("\n=== Optimized System Prompt ===")
    print(best_prompts["system_prompt"])
    print("\n=== Optimized Instance Prompt Template ===")
    print(best_prompts["instance_prompt"])
    
    # Now wire these into your SWE-agent:
    # success = run_experiment(best_prompts["system_prompt"], best_prompts["instance_prompt"])
    # print(f"Final success rate: {success}")

if __name__ == "__main__":
    main()
