import os
from openai import AzureOpenAI
import json
import time

PROMPT_LONG = """Instructions: Return only a json object for this radiology report, with a key-value pair for every line.
	        Each line starts with a numerical id. The key will be the id. The value will be another JSON object. 
            Our goal is to judge prioritizing clinically actionable and urgent findings. We are looking to determine the accuracy of 
            report impressions generated by models or reports from human radiologists.

            For each line, there are two steps:
                1. Correcting the error or inserting the omitted finding: 
                    -To make a change to an existing line, copy and paste the text and make your changes. 
                    -To add a new line, simply insert a row as necessary. 
                    -To entirely delete a line, please enter [delete].
                2. Grading the clinical severity of the error. There are four categories:
                For determining the error urgency, please take into consideration that the patients were seen in an ICU setting.
                    - Emergent error: Findings that suggest a need for immediate intervention, with significant impact on patient’s health
                    Examples: missed or incorrectly called tension pneumothorax, pneumoperitoneum, significantly malpositioned endotracheal tube
                    - Urgent error: Findings where failure to act may adversely affect patient health, that require urgent (but not immediate attention) 
                    and that if not acted on, may worsen over time and likely result in an adverse patient outcome
                    Examples: missed pneumonia, mildly malpositioned endotracheal tube, incorrect anatomy for pleural effusion
                    - Actionable nonurgent error: Findings that likely do not require action in the short term but if not acted upon may reasonably 
                    impact the patient’s health.
                    Examples: pulmonary nodule which requires follow up, presence of emphysematous changes
                    -Not actionable: Findings that reasonably would not likely have an impact on the patient’s management. Errors that contain a reference to a nonexistent comparison but otherwise contain the correct information are in this category.
                    Examples: chronic appearing rib fracture, describing a consolidation as stable when there is no comparison, description of pulmonary edema as asymmetric rather than bilateral (as this would not change management)
                
            Only include semantically relevant errors. For example, if the report states "no pleural effusion" and the model generates "no pleural effusion is seen", this is not an error.
            However, incorrect sizes or locations of findings are considered errors. Incorrect findings should be removed and omitted findings should be added.

            Please label each error with one or more of the following 6 error categories:
                1. False prediction of finding
                2. Omission of finding
                3. Incorrect location/position of finding
                4. Incorrect severity of finding
                5. Mention of comparison that is not present in the reference impression
                6. Omission of comparison describing a change from a previous study
            
            Please format your output as a JSON object as shown below.
        """

JSON_formatting = """
    Results should be a JSON object in the format of: 
    {
        "1": {
            "corrections": "Corrected sentence 1 from report.",
            "clinical severity": "severity of error",
            "comments": "any comments regarding the correction",
            "error category": ["list of error categories"]
        },  
        "5": {
            "corrections": "Corrected sentence 5 from report.",
            "clinical severity": "severity of error",
            "comments": "any comments regarding the correction",
            "error category": ["list of error categories"]
        }, 
        "None": {
            "corrections": "Insertion of new sentence for report.",
            "clinical severity": "severity of error",
            "comments": "any comments regarding the correction",
            "error category": ["list of error categories"]
        },
    }
"""

corrections1 = {"0": {
                    "corrections": "Right lower lung consolidation, either pneumonia, aspiration, or possibly pulmonary contusions from recent trauma.",
                    "clinical severity": "Actionable nonurgent error",
                    "comments": "Incorrect anatomy, which although incorrect likely does not result in urgent change in management",
                    "error category": ["Incorrect location/position of finding"]
                },
               "2": {
                   "corrections": "[delete]",
                   "clinical severity": "Actionable nonurgent error",
                   "comments": "Cardiomegaly not present, which may result in unecessary work up but likely not urgent in nature",
                   "error category": ["False prediction of finding"]
               },
               "None": {
                   "corrections": "No evidence of displaced rib fracture or pneumothorax.",
                   "clinical severity": "Not actionable",
                   "comments": "Given the indication, this was added.",
                   "error category": ["Omission of finding"]
               }
}
corrections2 = {"0": {
                    "corrections": "Two left lung nodules concerning for metastatic disease.",
                    "clinical severity": "Actionable nonurgent error",
                    "comments": "Incorrect number of nodules, may affect longer term follow up",
                    "error category": ["Incorrect location/position of finding", "Incorrect severity of finding"]
                },
               "None": {
                   "corrections": "Left basilar opacity could represent atelectasis or consolidation.",
                   "clinical severity": "Urgent error",
                   "comments": "In the context of neutropenic fever, missed possible pneumonia is an urgent finding",
                   "error category": ["Omission of finding"]
               },
               "1": {
                   "corrections": "Not actionable",
                   "clinical severity": "[delete]",
                   "comments": "Repetitive",
                   "error category": []
               }
}
corrections3 = {"0": {
                    "corrections": "Endotracheal tube projects approximately 2.2 cm above the carina. ",
                    "clinical severity": "Not actionable",
                    "comments": "Report references a non existent comparison but this does not affect management",
                    "error category": ["Mention of comparison that is not present in the reference impression"]
                },
               "1": {
                   "corrections": "Minimal atelectasis at the left and right lung bases.",
                   "clinical severity": "Not actionable",
                   "comments": "Incorrect anatomy of atelectasis would not affect management",
                   "error category": ["Incorrect location/position of finding"]
               },
               "3": {
                   "corrections": "[delete]",
                   "clinical severity": "Urgent error",
                   "comments": "Incorrect call of pulmonary edema is a potentially urgent error if diuretics are started, for example",
                   "error category": ["False prediction of finding"]
               }
}
corrections4 = {"1": {
                    "corrections": "[delete]",
                    "clinical severity": "Actionable nonurgent error",
                    "comments": "no gastric tube",
                    "error category": ["False prediction of finding"]
                },
               "5": {
                   "corrections": "[delete]",
                   "clinical severity": "Actionable nonurgent error",
                   "comments": "no enteric tube",
                   "error category": ["False prediction of finding"]
               }
}

corrections5 = {}

client = AzureOpenAI(
            azure_endpoint=os.getenv("OPENAI_ENDPOINT"),
            api_key=os.getenv("OPENAI_API_KEY"),
            api_version="2025-01-01-preview",
        )

# estimate cost of given input and output using gpt-4
def estimate_gpt4_cost(prompt_tokens, completion_tokens):
    input_cost = 0.03
    output_cost = 0.06
    
    return (input_cost*prompt_tokens/1000 + output_cost*completion_tokens/1000)


# FineRadScore few-shot prompt for gpt-4
def generate_gpt4_response(pred_target, gt_target):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": PROMPT_LONG},
            {"role": "user", "content": """Generated Text: [0] ___M with trauma, evaluate for injuries, pneumothorax. [1] Left lower lung platelike atelectasis. [2] Cardiomegaly. \n \n 
                                            Ground Truth Text: "Right lower lung consolidation, either pneumonia, aspiration, or possibly pulmonary contusions from recent trauma. No evidence of displaced rib fracture or pneumothorax."""},
            {"role": "assistant", "content": json.dumps(corrections1)},
            {"role": "user", "content": """Generated Text: [0] Three left lung nodules concerning for metastatic disease. [1] Multiple lung nodules \n \n 
                                            Ground Truth Text: "Two left lung nodules concerning for metastatic disease. Left basilar opacity could represent atelectasis or consolidation."""},
            {"role": "assistant", "content": json.dumps(corrections2)},
            {"role": "user", "content": """Generated Text: [0] Stable position of endotracheal tube projects 2.2 cm above the carina. [1] Minimal atelectasis at the right lung base. [2] Moderate cardiomegaly. [3] Pulmonary edema. [4] The presence of a minimal left pleural effusion cannot be excluded. \n \n 
                                            Ground Truth Text: "Endotracheal tube projects approximately 2.2 cm above the carina. Minimal atelectasis at the left and right lung bases. Moderate cardiomegaly. The presence of a minimal left pleural effusion cannot be excluded."""},
            {"role": "assistant", "content": json.dumps(corrections3)},
            {"role": "user", "content": """Generated Text: [0] Endotracheal tube is in standard position. [1] A nasogastric tube is seen coursing into the stomach with tip in the stomach. [2] Heart size is normal. [3] Lungs are clear. [4] No pleural effusion or pneumothorax. [5] Enteric tube tip is in the stomach. \n \n 
                                            Ground Truth Text: "Endotracheal tube is in standard position. Heart size is normal. Lungs are clear. No pleural effusion or pneumothorax."""},
            {"role": "assistant", "content": json.dumps(corrections4)},
            {"role": "user", "content": """Generated Text: [0] The lungs are well expanded. [1] There is no pleural effusion or pneumothorax. [2] The cardiomediastinal and hilar contours are unremarkable. \n \n 
                                            Ground Truth Text: "The lungs are adequately inflated. The contours of the cardiomediastinal and hilar regions appear normal. There are no indications of pleural effusion or pneumothorax."""},
            {"role": "assistant", "content": json.dumps(corrections5)},

            {"role": "user", "content": f"Generated Text: {pred_target} \n \n Ground Truth Text: {gt_target}"}
        ],
    )

    try:
        result = json.loads(response.choices[0].message.content)
    except:
        result = {"Failed": None}
    
    cost = estimate_gpt4_cost(response.usage.prompt_tokens, response.usage.completion_tokens)

    return result, cost


# FineRadScore zero-shot prompt for gpt-4
def generate_gpt4_response_zeroshot(pred_target, gt_target):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
                {"role": "user", "content": PROMPT_LONG + JSON_formatting + f"Generated Text: {pred_target} \n \n Ground Truth Text: {gt_target}"}
        ],
    )

    try:
        result = json.loads(response.choices[0].message.content)
    except:
        result = {"Failed": None}
    
    cost = estimate_gpt4_cost(response.usage.prompt_tokens, response.usage.completion_tokens)

    return result, cost