from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from pydantic import BaseModel,Field
from typing import Optional, List, Union, Dict, Any
import json
import os
import time
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_core.output_parsers import PydanticOutputParser
import re

class CommandlineTask(BaseModel):
    tool_name: str = Field(description="The name of the tool")
    setup_command: str = Field(description="The command to create the output directory")
    installation_command: str = Field(description="The command to install the tool environment.")
    execution_command: str = Field(description="The command to execute the tool")
    input_files: List[str] = Field(description="The list of input files from Available input files")
    output_files: List[str] = Field(description="The list of output files")
    explanation: Dict[str, Any] = Field(description="The explanation why the command is used")


prompt = '''
You are a bioinformatics workflow assistant.

Your task is to generate the necessary setup and execution details for running the CURRENT bioinformatics tool within an existing workflow.

Context:
- Tool name: {tool_name}
- Tool description: {tool_description}
- Workflow task: {workflow_task}
- Existing executed tools list: {executed_tools_list}
- Available input files: {available_input_files}
- Expected output formats of the tool: {expected_outputs_info}
- The files already exist in the working directory: {existing_files}
- The command lines used in other workflow steps: {command_line_list}
- The command line you can reference: {tool_command_reference}

Rules:
1. Input file selection:
   - Select input file(s) ONLY from {available_input_files}.
   - Ensure input type strictly matches the tool’s required input format (e.g., FASTA, TSV, BAM).
   - Do not fabricate or assume non-listed input files.

2. Output file naming & directory:
   - All outputs must be stored under: ./output/{tool_name}/
   - Output filenames must:
       a. Preserve the sample ID from the input filename.
       b. Append the tool name and step role (e.g., "_{tool_name}_classified", "_{tool_name}_metrics").
       c. Use extensions consistent with {expected_outputs_info}.
   - Do not overwrite files from previous steps.

3. Setup command:(The command to create the output directory)
   - Create the output directory if it does not exist(Not in the folder to which these files {existing_files} belong to):
       `"setup_command": "mkdir -p ./output/{tool_name}/"`
    - If the output directory already exists, use it directly without recreating. 
        `"setup_command": "cd ./output/{tool_name}/"`

4. Installation command:(The command to install the tool in a new environment)
   - If the tool is not in {executed_tools_list}, create a new conda environment and install it in this environment:
        if the tool needs python:
       `"installation_command": "conda create -n {tool_name} -c conda-forge -c bioconda {tool_name} python=3.11 -y && conda activate {tool_name}"`
       if the tool does not need python:
       `"installation_command": "conda create -n {tool_name} -c conda-forge -c bioconda {tool_name} -y && conda activate {tool_name}"`
   - If the tool is already installed, skip the installation step, directly activate the environment:
        `"installation_command": "conda activate {tool_name}"`
   - If you think conda is not available, try pip:
       `"installation_command": "pip install {tool_name}[all]"`
   - If you think pip is not available, try apt-get:
       `"installation_command": "apt-get install {tool_name}"`
   - Ensure all required dependencies are included.

5. Execution command (The command to execute the tool)
    - Construct the command specifically for {tool_name}. The core task of this tool is {tool_description}.
    - Use absolute paths for all input and output files. Do not create directories or symbolic links—assume all inputs already exist and output paths are ready.
    - Select input files only from {available_input_files}.
    - The output files' format should follow {expected_outputs_info}.
    - Ensure that all input files actually exist before running the command.
    - Ensure every environment variable is set before running the command.
    - Name the output files based on {tool_name} and {expected_outputs_info}, preserving the input sample ID in each output filename. Ensure filenames do not conflict with {existing_files} or other outputs.
            Example: Input file: sampleA.fasta → Tool: gtdbtk → Expected output: taxonomy classification table → Output filename: sampleA.gtdbtk.classification.tsv.
    - You can refer to the command line {tool_command_reference}, but do not strictly follow it. 
    Focus only on generating **the actual execution command that runs the tool on the inputs and produces the outputs**.

6. Explanation:
   - Explicitly mention any corrections made compared to {command_line_list}.

7. System environment:
   - Assume Linux OS with CUDA 12.6 available.
   - Ensure installation and execution commands are compatible with this environment.
   - If GPU acceleration is possible, prefer GPU-enabled execution.

Output format:
You MUST output in this strict JSON structure:{format_instructions}

class CommandlineAgent:
    def __init__(self, temperature: float = 0.5):
        self.parser = PydanticOutputParser(pydantic_object=CommandlineTask)
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", prompt)
        ])
        from langchain_openai import ChatOpenAI

        self.llm = ChatOpenAI(
            base_url="",
            api_key="",
            model="",
            temperature=temperature
        )

    def generate(self, tool_name, tool_description, tool_command_reference, workflow_task, executed_tools_list, available_input_files, expected_outputs_info,command_line_list,existing_files):
        chain = self.prompt | self.llm | self.parser
        try:
            result = chain.invoke({
                "tool_name": tool_name,
                "tool_description": tool_description,
                "tool_command_reference": tool_command_reference,
                "workflow_task": workflow_task,
                "existing_files": existing_files,
                "executed_tools_list": executed_tools_list,
                "available_input_files": available_input_files,
                "expected_outputs_info": expected_outputs_info,
                "command_line_list": command_line_list,
                "format_instructions": self.parser.get_format_instructions()
            })
            return result
        except Exception as e:
            print("Error:", e)
            return None

def generate_commandline(tool_name, tool_description, tool_command_reference, workflow_task, executed_tools_list, available_input_files, expected_outputs_info,command_line_list,existing_files):
    commandline_agent = CommandlineAgent()
    result = commandline_agent.generate(tool_name, tool_description, tool_command_reference, workflow_task, executed_tools_list, available_input_files, expected_outputs_info,command_line_list,existing_files)
    if result:
        return result.model_dump()
    else:
        return None