#!/usr/bin/env python3
"""
Script to generate custom tool code based on SWE-agent trajectory analysis.
Uses the generate_tool_code_prompt.txt to analyze trajectory conversations
and generate tools that would have helped the agent perform better.
"""

import argparse
from pathlib import Path
import openai
import sys
sys.path.append("/root/SWE-agent/scripts")

from trajectory_extract.processor import process_trajectory, format_conversation

import re
import os

def parse_tool_output(raw_text: str) -> dict:
    """
    Parses the multi-part tool generation output to extract the YAML,
    source code, and install script.

    Args:
        raw_text: The complete string containing the tool output.

    Returns:
        A dictionary with keys 'yaml', 'code', and 'install_script'.
        Returns an empty string for any part that is not found.
    """
    # Use re.DOTALL so that '.' matches newlines
    # Use non-greedy matching '.*?' to find the first closing ```
    
    # Pattern to find the YAML block
    yaml_pattern = r"```yaml\n(.*?)\n```"
    yaml_match = re.search(yaml_pattern, raw_text, re.DOTALL)
    
    # Pattern to find the Python code block
    code_pattern = r"```python\n(.*?)\n```"
    code_match = re.search(code_pattern, raw_text, re.DOTALL)
    
    # Pattern to find the bash install script block
    install_script_pattern = r"```bash\n(.*?)\n```"
    install_script_match = re.search(install_script_pattern, raw_text, re.DOTALL)

    return {
        "yaml": yaml_match.group(1).strip() if yaml_match else "",
        "code": code_match.group(1).strip() if code_match else "",
        "install_script": install_script_match.group(1).strip() if install_script_match else ""
    }


def generate_tool_code(trajectory_summary: str, prompt_content: str, client: openai.Client, model: str) -> str:
    """Generate tool code based on trajectory analysis."""
    # Replace the placeholder with actual trajectory summary
    #
    full_prompt = prompt_content.replace("{{TRAJECTORY_SUMMARY}}", trajectory_summary)

    print(f"length of full_prompt: {len(full_prompt)}")
    # not set timeout
    response = client.chat.completions.create(
        model=model,
        messages=[
            {"role": "user", "content": full_prompt},
        ],
        temperature=0.8,
        max_tokens=4096,
        timeout=None,
    )

    return response.choices[0].message.content


def save_diy_tool(parsed_code: dict) -> None:
    """
    Save the parsed_code as a diy tool following the tools/edit_anthropic pattern.
    
    Args:
        parsed_code: Dictionary containing 'yaml', 'code', and 'install_script' keys
    """
    # Extract tool name from YAML config
    tool_name = extract_tool_name_from_yaml(parsed_code.get("yaml", ""))
    
    # Construct tool directory with diy_ prefix
    tool_dir_name = f"diy_{tool_name}" if tool_name != "diy_tool" else "diy_tool"

    tool_dir = Path(f"/root/SWE-agent/tools/{tool_dir_name}")
    bin_dir = tool_dir / "bin"
    
    # Ensure directories exist
    tool_dir.mkdir(parents=True, exist_ok=True)
    bin_dir.mkdir(parents=True, exist_ok=True)
    
    # Save config.yaml
    if parsed_code["yaml"]:
        config_path = tool_dir / "config.yaml"
        config_path.write_text(parsed_code["yaml"])
        print(f"Saved config.yaml to {config_path}")
    
    # Save install.sh
    if parsed_code["install_script"]:
        install_path = tool_dir / "install.sh"
        install_path.write_text(parsed_code["install_script"])
        # Make install script executable
        os.chmod(install_path, 0o755)
        print(f"Saved install.sh to {install_path}")
    
    # Save the main tool executable
    if parsed_code["code"]:
        tool_executable_path = bin_dir / tool_name
        tool_executable_path.write_text(parsed_code["code"])
        # Make tool executable
        os.chmod(tool_executable_path, 0o755)
        print(f"Saved tool executable to {tool_executable_path}")
    
    print(f"Successfully created diy tool '{tool_name}' in {tool_dir}")


def extract_tool_name_from_yaml(yaml_content: str) -> str:
    """
    Extract the tool name from YAML configuration.
    
    Args:
        yaml_content: The YAML content as a string
        
    Returns:
        The tool name found in the YAML, or 'diy_tool' as default
    """
    if not yaml_content:
        return "diy_tool"
    
    lines = yaml_content.split('\n')
    in_tools_section = False
    
    for line in lines:
        stripped_line = line.strip()
        
        # Check if we're entering the tools section
        if stripped_line == 'tools:':
            in_tools_section = True
            continue
            
        # If we're in tools section and find a tool definition
        if in_tools_section and ':' in line and not stripped_line.startswith('-'):
            # Extract the tool name (everything before the colon, accounting for indentation)
            # Remove leading whitespace and get the part before ':'
            tool_name = stripped_line.split(':')[0].strip()
            if tool_name and not tool_name.startswith('#'):  # Ignore comments
                return tool_name
                
        # If we hit another top-level section, we're done with tools
        if in_tools_section and line and not line.startswith(' ') and not line.startswith('\t') and stripped_line != 'tools:':
            break
    
    return "diy_tool"


def main():
    parser = argparse.ArgumentParser(description="Generate custom tool code from SWE-agent trajectory")
    # parser.add_argument("--trajectory", type=Path, default=Path("/root/SWE-agent/tool_gen/v3_output_agent_without_str/iterative__dvc-6954/iterative__dvc-6954.traj"), 
    parser.add_argument("--trajectory", type=Path, default=Path("tool_gen/v3_output_agent_without_str/facebookresearch__hydra-1551/facebookresearch__hydra-1551.traj"), 
                       help="Path to trajectory file")
    parser.add_argument("--prompt", type=str, 
                       default="tool_gen/prompts/generate_tool_code_prompt.txt",
                       help="Path to prompt file")
    parser.add_argument("--port", type=int, default=8001,
                       help="OpenAI API port")
    parser.add_argument("--model", type=str, default="gpt-4",
                       help="Model to use for generation")
    parser.add_argument("--output", type=str,
                       help="Output file for generated tool code")
    
    args = parser.parse_args()
    
    # Initialize OpenAI client
    client = openai.Client(base_url=f"http://34.66.144.59:{args.port}/v1", api_key="None")
    
    # Read the prompt
    prompt_path = Path(args.prompt)
    if not prompt_path.exists():
        print(f"Error: Prompt file not found: {prompt_path}")
        return 1
    
    prompt_content = prompt_path.read_text()
    
    # Read and process the trajectory data
    trajectory_path = Path(args.trajectory)
    if not trajectory_path.exists():
        print(f"Error: Trajectory file not found: {trajectory_path}")
        return 1
    
    processed_trajectory = process_trajectory(trajectory_path)
    trajectory_summary = format_conversation(processed_trajectory)
    
    print("Analyzing trajectory and generating tool code...")
    print(f"Trajectory: {trajectory_path}")
    print(f"Total messages: {processed_trajectory['total_messages']}")
    if processed_trajectory.get('truncated', False):
        original_total = processed_trajectory.get('original_total_messages', 0)
        max_turns = processed_trajectory.get('max_turns_limit', 50)
        print(f"[TRUNCATED: Showing first {max_turns} of {original_total} original messages]")
    print("-" * 80)
    
    # Generate tool code
    generated_code = generate_tool_code(trajectory_summary, prompt_content, client, args.model)
    parsed_code = parse_tool_output(generated_code)
    
    # Save parsed_code as a diy tool following tools/edit_anthropic pattern
    save_diy_tool(parsed_code)
    
    return 0


if __name__ == "__main__":
    exit(main())
