import os
import sys
import time
import openai
import argparse
from tqdm import tqdm
from pathlib import Path
from openai import APIError
from openai.error import APIConnectionError, Timeout, RateLimitError, ServiceUnavailableError

sys.path.insert(0, str(Path(__file__).parent.parent.parent))

from mimic_cxr_utils import *


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--task", required=True, choices=["correction", "template", "history"])
    parser.add_argument("--split", required=True, choices=["train", "test"])
    parser.add_argument("--data_dir", default="")
    parser.add_argument("--output_path", default=None)
    parser.add_argument("--get_reason", action="store_true")
    parser.add_argument("--api_type", default="azure")
    parser.add_argument("--api_base", default="")
    parser.add_argument("--api_version", default="2023-07-01-preview")
    parser.add_argument("--api_key", default=None)
    args = parser.parse_args()
    return args

args = parse_args()
task = args.task
split = args.split


assert task in ["correction", "history", "template"]

openai.api_type = args.api_type
openai.api_base = args.api_base

openai.api_version = args.api_version
openai.api_key = os.getenv("OPENAI_API_KEY") if args.api_key is None else args.api_key

data_dir = args.data_dir
out_dir = f"reports_{task}" if args.output_path is None else args.output_path

data_dir = Path(data_dir)
if not os.path.isdir(out_dir):
    os.mkdir(out_dir)

dicomid2label = create_id2label_dict(data_dir/"mimic-cxr-2.0.0-metadata.csv")
studyid2split = create_id2split_dict(data_dir/"mimic-cxr-2.0.0-split.csv")
studyid2path = create_id2path_dict(data_dir/"mimic-cxr-2.0.0-metadata.csv")
metadata = pd.read_csv(data_dir/"mimic-cxr-2.0.0-metadata.csv")

max_trials = 10
engine = "gpt-4-32k"
# engine = "gpt-4"

def query_gpt(messages):
    while True:
        try:
            response = openai.ChatCompletion.create(
                messages=messages,
                engine=engine,
                temperature=0.7,
                max_tokens=800,
                top_p=0.95,
                frequency_penalty=0,
                presence_penalty=0,
                stop=None,
                request_timeout=20,
            )
        except (APIConnectionError, Timeout, RateLimitError, APIError) as e:
            print(e)
            time.sleep(5)
            continue
        break
    return response


templates = [
"""REPORT: [Imaging Protocol]
COMPARISON: [None/Compare with former image]
	
FINDINGS:
	Lines/tubes:

	Lungs: 

	Pleura:

	Heart and mediastinum:

	Bones:

	Support Devices:
""", # t1
"""Findings:

Bronchovascular markings:
Rest of the visualised lung fields:
Bilateral hilum:
Cardiac silhouette:
Costophrenic angles:
Visualised bones & soft tissues:
Support Devices:
""", # t2
"""Comparison:
- [ ]None.
- [ ]Compare to historical Report.

Findings:
 Lungs:
 - [ ]The lungs are clear.
 - [ ]The inspiratory volumes are small, and this probably accounts for some vascular crowding and atelectasis at the bases.
 - [ ]There is focal opacity at the right lung base most likely representing right lower lobe atelectasis.
 - [ ]There is focal opacity at the right lung base most likely representing a combination of a moderate right pleural effusion and associated passive right lower lobe atelectasis.
 - [ ]There is a focal opacity at the left lung base most like representing left lower lobe atelectasis.
 - [ ]There is a focal opacity at the left lung base, characteristic of a combination of a moderate left pleural effusion and associated atelectasis.
 - [ ]There is patchy opacity at both lung bases characteristic of atelectasis.
 - [ ]There is patchy opacity at both lung bases characteristic of a combination of atelectasis and effusions.
 - [ ]There is vascular congestion with increased interstitial markings findings indicating mild cardiogenic edema.
 - [ ]There is vascular congestion with mixed interstitial and patchy alveolar opacities, findings indicating moderate cardiogenic edema.
 - [ ]There is extensive alveolar consolidation in the lungs bilaterally, most likely representing pulmonary edema. This is probably on the basis of severe congestive heart failure but could be a result of noncardiogenic causes.
 - [ ]There are multiple patchy areas of consolidation, widely scattered about the lungs bilaterally. This most likely represents a multifocal pneumonia.
 - [ ]There is patchy opacity at both lung bases characteristic of a combination of atelectasis and effusions. There is mild vascular and interstitial prominence, likely reflecting mild pulmonary edema.
 - [ ]There is mild pulmonary vascular engorgement without pulmonary edema.

 Heart:
 - [ ]The heart is normal in size. 
 - [ ]There is mild cardiomegaly.
 - [ ]There is moderate cardiomegaly.
 - [ ]There is severe cardiomegaly. 
 - [ ]There is marked cardiomegaly.
 - [ ]The heart is top normal in size. 

 Mediastinum:
 - [ ]The mediastinum is within normal limits.
 - [ ]Atherosclerotic calcifications are seen in the aorta.
 - [ ]The aorta appears tortuous, a finding usually associated with either atherosclerosis or systemic hypertension.
 - [ ]The aortic contour is quite prominent, a finding likely indicating either an aortic aneurysm or dissection.
 - [ ]Post-operative changes are present in the mediastinum.
 - [ ]Degenerative changes are present in the thoracic spine.

 Support Devices:
 - [ ]None
 - [ ]pacemaker
 - [ ]PICC
 - [ ]tube
 - [ ]catheter
 - [ ]other
""", # t3
"""Comparison:
Comparison Study: 
- [ ]None.
- [ ]Compare to historical Report.

Findings:
Lungs:
 -[ ] The lungs are clear.
 -[ ] Subsegmental atelectasis is present at both bases.
 -[ ] Bibasilar opacities represent small bilateral pleural effusions with overlying atelectasis.
 -[ ] Mild pulmonary vascular congestion is present. There is no evidence of associated pulmonary edema.
 -[ ] Mild diffuse interstitial pulmonary edema is present, likely cardiogenic.
 -[ ] Moderate alveolar pulmonary edema is present, likely cardiogenic.
 -[ ] Marked diffuse pulmonary edema and consolidation are present.
 -[ ] Subsegmental atelectasis is present at the left base.
 -[ ] Subsegmental atelectasis is present at the right base.
 -[ ] An opacity at the left base represents a small pleural effusion with overlying atelectasis.
 -[ ] An opacity at the right base represents a small pleural effusion with overlying atelectasis.
 -[ ] Small bilateral pleural effusions are present with overlying atelectasis. Mild cardiogenic interstitial edema also is present.
 -[ ] The inspiratory volumes are small, which probably explains increased interstitial opacity and atelectasis at the bases.
 -[ ] Other.

Pleural Spaces:
 -[ ] No pleural abnormalities are listed.
 -[ ] Trace bilateral pleural effusions are present.
 -[ ] Small bilateral pleural effusions are present.
 -[ ] Moderate bilateral pleural effusions are present.
 -[ ] Large bilateral pleural effusions are present.
 -[ ] Other.

Heart:
 -[ ] The heart is normal in size.
 -[ ] The heart is mildly enlarged.
 -[ ] The heart is moderately enlarged.
 -[ ] The heart is markedly enlarged.
 -[ ] Other.

Mediastinum:
 -[ ] The mediastinal contours are normal.
 -[ ] The thoracic aorta is tortuous.
 -[ ] Calcifications are present in the thoracic aorta.
 -[ ] The thoracic aorta is tortuous and calcified.
 -[ ] Other.

Osseours Structures:
 -[ ] There are no osseous abnormalities.
 -[ ] Degenerative changes are present in the thoracic spine.
 -[ ] A mild thoracic levoscoliosis is present.
 -[ ] A mild thoracic dextroscoliosis is present.
 -[ ] A mild S-shaped thoracolumbar scoliosis is present.
 -[ ] Other.

Additional Findings: 
 -[ ] None.
 -[ ] Additional Findings:

Support Devices:
 - [ ]None
 - [ ]pacemaker
 - [ ]PICC
 - [ ]tube
 - [ ]catheter
 - [ ]other
""", # t4
"""Modality: X rays
Part: Chest

Findings:
Bony Cage: [Normal/Other findings]
Soft tissue of Chest:[Normal/Other findings]
Trachea:[In Midline/Other findings]
Lungs:[Both Lung fields are equally translucent/Other findings]
Heart:[Cardiac size and contour are normal/Other findings]
Hilum & Mediastinum:[Normal/Other findings]
Costphrenic and Cardiophrenic angles:[Clear/Other findings]
Support Devices:[None/Findings]
Other:[Nil/Other findings]
""", # t5
"""COMPARISON: [None./Comparison]

FINDINGS:

Lungs/pleura: [Normal./Other findings]

Heart/mediastinum: [Normal./Other findings]

Bones/Soft tissues: [Normal./Other findings]

Support Devices: [None./Other findings]
""", # t6
"""Findings:

    Lungs:
        Parenchyma: [Clear | Infiltrates | Consolidation | Nodules]
        Pleura: [Normal | Thickening | Effusion]
        Interstitial Markings: [Normal | Increased]

    Heart:
        Size: [Normal | Enlarged]
        Contours: [Normal | Abnormal]

    Mediastinum:
        Width: [Normal | Wide]
        Contour: [Normal | Abnormal]

    Bones:
        Ribs: [Normal | Fracture | Lesion]
        Spine: [Normal | Degenerative changes | Fracture | Lesion]
        Clavicles: [Normal | Fracture | Lesion]

    Diaphragm:
        Position: [Normal | Elevated]
        Contour: [Normal | Abnormal]

    Soft Tissues: [Normal | Abnormal]

    Support Devices: [None | pacemaker | PICC | tube | catheter | other]
""", # t7
"""Findings:
    Heart: [Normal size and contour | Enlarged | Other]
    Mediastinum: [Normal contour | Widened | Mass | Other]
    Lungs: 
        - Parenchyma: [Clear | Consolidation | Interstitial markings | Nodule(s) | Mass | Other]
        - Effusion: [Absent | Small | Moderate | Large]
            if Effusion is not Absent:
            - Location: [Right | Left | Bilateral]
            - Estimated volume: [<=100 mL | 101-500 mL | 501-1000 mL | >1000 mL]
        - Pneumothorax: [Absent | Present]
            - Size: [<# cm at apex | # cm]
  
    Bones: [Normal | Fracture(s) | Lytic lesions | Other abnormalities]
    Soft Tissues: [Normal | Swelling | Mass | Air | Other abnormalities]
    Diaphragm: [Well-defined | Elevated | Blurred | Irregular | Other]
    Pleura: [Normal | Thickening | Plaque | Calcification | Other]
    Support Devices: [None | pacemaker | PICC | tube | catheter | other]
    Other findings: [Provide details if any other abnormalities are noted]
""", # t8
"""Findings:

Heart:
- Size: [Normal | Enlarged]
- Contour: [Normal | Abnormal]

Lungs:
- Lung Fields: [Clear | Consolidation | Infiltrates | Pleural Effusion]
- Nodules/Masses: [None | Single | Multiple]
    - If applicable, provide details:
        - Location: [Right Upper Lobe; Right Middle Lobe; Right Lower Lobe; Left Upper Lobe; Left Lower Lobe]
        - Size: [# cm]
        - Characteristics: [Smooth; Spiculated; Calcified]

Pleura:
- Pleural Lines: [Normal | Thickened]
- Pleural Effusion: [None | Right | Left | Bilateral]

Mediastinum:
- Mediastinal Width: [Normal | Enlarged]
- Mediastinal Masses: [No | Yes]
    - If applicable, provide details:
        - Location: [Anterior; Middle; Posterior]
        - Size: [# cm]
        - Characteristics: [Smooth; Irregular]

Bones and Soft Tissues:
- Ribs: [Normal | Fracture | Lesions]
- Spine: [Normal | Degenerative Changes | Fracture | Lesions]
- Soft Tissues: [Normal | Abnormal]

Support Devices: [None | pacemaker | PICC | tube | catheter | other]
""", # t9
"""Findings:  
  
Lungs:  
- Parenchyma: [Clear | Consolidation | Interstitial markings | Other: please specify]  
- Nodules/Masses: [Absent | Present]  
    {If present, complete the following:}  
    - Number of Nodules/Masses: [#]  
    - Size of the largest Nodule/Mass: [# mm | # cm]  
    - Location: [Right Upper Lobe | Right Middle Lobe | Right Lower Lobe | Left Upper Lobe | Left Lower Lobe; specify segment if known]  
    - Characteristics: [Well-defined | Spiculated | Cavitary | Calcified | Other: [specify]]  
    - Density: [Solid | Ground glass | Mixed | Other: [specify]]  
  
Cardiomediastinal Contour:  
- Heart Size: [Normal | Enlarged]  
- Mediastinal Shape: [Normal | Widened | Other: please specify]  
  
Pleura:  
- Pleural Effusion: [Absent | Present]  
    {If present, specify side and approximate volume if possible}  
- Pleural Thickening: [Absent | Present]  
    {If present, describe extent and location}  
  
Bones:  
- Ribs/Spine/Clavicles/Scapulae: [Normal | Fracture(s) | Lesion(s) | Other: [specify]]  
  
Diaphragm and Abdomen:  
- Diaphragm: [Normal contour | Elevated hemidiaphragm | Other: please specify]  
- Abdominal Component: [Not visible | Gas under diaphragm | Other: please specify]  
  
Soft Tissues and Other Observations:  
- Soft Tissue: [Normal | Abnormality noted: please specify]  
- Foreign Bodies: [Absent | Present: please specify location and appearance]  
- Additional Findings: [None | Specify: please specify]  

Support Devices: [None | pacemaker | PICC | tube | catheter | other]
""" # t10
]

def get_correction_data(gt, out_file):
    messages = [{
        "role": "system",
        "content": "You are a radiologist and will intentionally make some diagnosis errors of a chest x-ray radiology report. You will be given a correct findings section of chest x-ray report, and you will be asked to rewrite the report with a few, around 2-4, diagnostic errors. Provide the rewritten wrong report only."
    }, {
        "role": "user",
        "content": "Normal lung parenchyma without consolidation or mass lesions. Centrilobular emphysema in upper lobes with small nodular opacities (4-6 mm). No lymphadenopathy or pleural effusion. Unremarkable cardiac and upper abdominal structures. Impression: Normal lung parenchyma. Centrilobular emphysema in upper lobes with small nodular opacities. No lymphadenopathy or pleural effusion. Unremarkable cardiac and upper abdominal structures."
    }, {
        "role": "assistant",
        "content": "Normal lung parenchyma without consolidation or mass lesions. No emphysema. Pleural effusion in lower left lobe. Unremarkable cardiac and upper abdominal structures. Impression: Normal lung parenchyma. Lower left pleural effusion. Unremarkable cardiac and upper abdominal structures."
    }, {
        "role": "user",
        "content": gt
    }]
    incorrect_report = query_gpt(messages)["choices"][0]["message"]["content"]
    messages = [{
        "role": "system",
        "content": f"You are a radiologist who helps check the correctness of chest x-ray radiology report. Given a correct report and an incorrect report, provide 2 or 3 short and concise pointers on how to fix the incorrect report into the correct report. Do not be verbose or verbatim. List the instructions or suggestions only. The doctor has access to the radiograph, therefore your instructions {random.choice(['should','could'])} be vague or only state what the problems are without actual instruction to fix them."
    }, {
        "role": "user",
        "content": f"Incorrect report: {incorrect_report}\nCorrect report: {gt}"
    }]
    instructions = query_gpt(messages)["choices"][0]["message"]["content"]
    messages = [{
        "role": "system",
        "content": f"You are a radiologist who helps check the correctness of radiology reports. Given a pair of correct and incorrect reports and the instructions to revise the incorrect report, print \"yes\" if the instruction can be used to modify the incorrect report into correct report, otherwise print \"no\". Specifically pay attention to the revision direction so that the instructions do not modify correct report into incorrect ones."
    }, {
        "role": "user",
        "content": f"Incorrect report: {incorrect_report}\nCorrect report: {gt}\nInstructions: {instructions}"
    }]
    correct = query_gpt(messages)["choices"][0]["message"]["content"]
    if "no" in correct.lower(): return False
    out_string = f"INCORRECT REPORT:\n{incorrect_report.strip()}\n\nINSTRUCTIONS:\n{instructions.strip()}\n\nGT:\n{gt.strip()}\n"
    out_file = open(out_file, 'w')
    out_file.write(out_string)
    out_file.close()
    print(out_string)
    return True

def get_template_data(gt, out_file):
    template_idx = random.randint(0,len(templates)-1)
    if template_idx in [0,1]:
        instruction = "fill in the blanks"
    else:
        instruction = "select right items and add necessary details"
    template = templates[template_idx]
    messages = [{
        "role": "system",
        "content": f"You are an assistant who helps format radiology reports using structured templates. The template is:\n{template}\nYou will be given a free-text radiology report, and you need to rewrite the report using this template by {instruction}. Leave any section blank if the report doesn't mention related information."
    }, {
        "role": "user",
        "content": gt
    }]
    templated_report = query_gpt(messages)["choices"][0]["message"]["content"]
    messages = [{  # Check diagnosis in templated report follows gt report
        "role": "system",
        "content": "You are a radiologist's assistant who helps check the consistency between a free-text report and a templated report. Output \"yes\" if the diagnosis of the templated report matches the diagnosis in the free-text report on all pathologies, otherwise output \"no\" if there are any errors or inconsistencies."
    }, {
        "role": "user",
        "content": f"Free-text Report:\n{gt}\nTemplate Report:\n{templated_report}"
    }]
    correct = query_gpt(messages)["choices"][0]["message"]["content"]
    reason = ""
    if "no" in correct.lower():
        messages.append({
            "role": "assistant",
            "content": "no",
        })
        messages.append({
            "role": "user",
            "content": "Please specify why your answer is no, what is not consistent or incorrect in the given information?"
        })
        reason += query_gpt(messages)["choices"][0]["message"]["content"]
    messages = [{  # Check template report follow template
        "role": "system",
        "content": "You are a radiologist's assistant who helps check the consistency between a template and a formatted report. Output \"yes\" if the report follows the format of the given template, otherwise output \"no\" if the report does not follow the structure of the template."
    }, {
        "role": "user",
        "content": f"Formatted Report:\n{templated_report}\nTemplate:\n{template}"
    }]
    correct = query_gpt(messages)["choices"][0]["message"]["content"]
    if "no" in correct.lower():
        messages.append({
            "role": "assistant",
            "content": "no",
        })
        messages.append({
            "role": "user",
            "content": "Please specify why your answer is no, what is not consistent or incorrect in the given information?"
        })
        reason += query_gpt(messages)["choices"][0]["message"]["content"]
    out_string = f"TEMPLATE:\n{template.strip()}\n\nTEMPLATED REPORT:\n{templated_report.strip()}\n\nGT:\n{gt.strip()}\n\nReason:\n{reason}"
    out_file = open(out_file, 'w')
    out_file.write(out_string)
    out_file.close()
    print(out_string)
    return True


def get_history_data(report, findings, impression, out_file):
    messages = [{  # Extract indication
        "role": "system",
        "content": f"You are a radiology assistant who help extract information from a chest x-ray radiology report. Output the corresponding section related to indication or medical history, if there are no such sections, output \"none\"."
    }, {
        "role": "user",
        "content": report
    }]
    indication = query_gpt(messages)["choices"][0]["message"]["content"].lower()
    if "none" in indication:
        indication = ""
    messages = [{  # Infer possible medical history and condition
        "role": "system",
        "content": "You are a clinical assistant that helps radiologists retrieve information of patient's other medical records and examination results. You will be given a chest x-ray diagnosis report, and you need to infer the patient's possible medical conditions or history based on the indication and/or impression. Only list 2 to 4 of the possible abnormal medical history or conditions that are related to the indication, findings or impression of the report. Be short and concise and only output the list."
    }, {
        "role": "user",
        "content": report
    }]
    history = query_gpt(messages)["choices"][0]["message"]["content"]
    messages = [{  # Infer possible tests
        "role": "system",
        "content": "You are a clinical assistant that helps radiologists retrieve information of patient's other medical records and examination results. You will be given a chest x-ray diagnosis report, and you need to infer possible medical lab tests that the patient should undergo. Only list 2 to 4 of the possible medical tests that are related to the indication, findings, or impression of the report. Be short and concise and only output the list."
    }, {
        "role": "user",
        "content": report
    }]
    tests = query_gpt(messages)["choices"][0]["message"]["content"]
    messages = [{  # Infer test results
        "role": "system",
        "content": "You are a clinical assistant that helps radiologists retrieve information of patient's other medical records and examination results. You will be given a chest x-ray diagnosis report with names of some medical tests, and you need to infer the reasonable test results and/or values that the patient should have based on the report indication, findings, and impression. Be concise and definitive and do not use uncertain terms. Only output the list without any other statement."
    }, {
        "role": "user",
        "content": f"Report:\n{report}\n\nTests:\n{tests}"
    }]
    test_results = query_gpt(messages)["choices"][0]["message"]["content"]
    messages = [{  # Check consistency and correctness
        "role": "system",
        "content": "You are a clinical assistant that helps radiologists check information consistency of patient's report and medical record. You will be given a chest x-ray diagnosis report, along with some patient's medical history, condition, and lab test results. You need to check whether the medical history/condition and lab test results are diagnostically consistent with the radiology report with no factual errors. If they are not consistent or has any factual errors, output \"no\". If the report and other information are correct and consistent, output \"yes\"."
    }, {
        "role": "user",
        "content": f"{indication}\nMedical History:\n{history}\n\nMedical Tests:\n{test_results}\n\nReport:\n{report}"
    }]
    correct = query_gpt(messages)["choices"][0]["message"]["content"]
    reason = ""
    if "no" in correct.lower():
        print(correct)
        messages.append({
            "role": "assistant",
            "content": "no",
        })
        messages.append({
            "role": "user",
            "content": "Please specify why your answer is no, what is not consistent or incorrect in the given information?"
        })
        reason = query_gpt(messages)["choices"][0]["message"]["content"]
    out_string = f"{indication.strip()}\n\nMedical History:\n{history.strip()}\n\nMedical Tests:\n{test_results.strip()}\n\nGT:\n{report.strip()}\n\nReason:\n{reason}"
    out_file = open(out_file, 'w')
    out_file.write(out_string)
    out_file.close()
    print(out_string)
    return True

unique_id = 0
for patient_path in tqdm((data_dir/"files").glob("p*/p*")):
    patient_id = patient_path.name
    for study_path in patient_path.glob("s*"):
        study_id = study_path.name
        out_file_name = os.path.join(out_dir, f"{study_id}.txt")
        if os.path.exists(out_file_name):
            continue
        # image_path_list = [str(path)[len(str(data_dir))+1:] for path in list(study_path.glob("*.jpg"))]
        # image_label_list = [dicomid2label[path.split('/')[-1][:-4]] for path in image_path_list]
        # image_paths = ','.join(image_path_list)
        # image_labels = ','.join(image_label_list)
        report_path = data_dir/"files"/"reports"/patient_id[:3]/patient_id/f"{study_id}.txt"
        study_split = studyid2split[study_id[1:]]
        if split != "all" and study_split != split: continue
        report, findings, impression = parse_report(report_path)
        if findings == "" and task == "correction":
            print(f"Fail to parse findings gt from report {report_path}")
            continue  # Skip report without findings section for correction task
        status = False
        trials = 0
        while status is False or trials >= max_trials:
            if task == "correction":
                status = get_correction_data(findings, out_file_name)
            elif task == "template":
                report = parse_report_raw(report_path)
                status = get_template_data(report, out_file_name)
            elif task == "history":
                report = parse_report_raw(report_path)
                status = get_history_data(report=report, findings=findings, impression=impression, out_file=out_file_name)
            trials += 1
        if status is False: continue
        unique_id += 1
        print(unique_id)


