from tqdm import tqdm
from pathlib import Path

from mimic_cxr_utils import *


data_dir = "/data/datasets/MIMIC-CXR/"
out_path = "/data/datasets/MIMIC-CXR/processed.csv"

data_dir = Path(data_dir)
try:
    out_file = open(out_path, 'w', newline='')
    print(f"data saved as {out_path}")
except:
    out_file = open("processed.csv", 'w', newline='')
    print("data saved as processed.csv")

csv_writer = csv.writer(out_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
dicomid2label = create_id2label_dict(data_dir/"mimic-cxr-2.0.0-metadata.csv")
studyid2split = create_id2split_dict(data_dir/"mimic-cxr-2.0.0-split.csv")

unique_id = 0

# num_mult_imgs = 0
# num_multiple_views = 0
for patient_path in tqdm((data_dir/"files").glob("p*/p*")):
    patient_id = patient_path.name
    for study_path in patient_path.glob("s*"):
        multiple_views = False
        study_id = study_path.name
        image_path_list = [str(path)[len(str(data_dir))+1:] for path in list(study_path.glob("*.jpg"))]
        image_label_list = [dicomid2label[path.split('/')[-1][:-4]] for path in image_path_list]
        # if len(image_label_list) > 1:
        #     num_mult_imgs += 1
        #     multiple_views = True
        image_paths = ','.join(image_path_list)
        image_labels = ','.join(image_label_list)
        report_path = data_dir/"files"/"reports"/patient_id[:3]/patient_id/f"{study_id}.txt"
        split = studyid2split[study_id[1:]]
        report, findings, impression = parse_report(report_path)
        if findings == "" or impression == "":
            continue
        # if multiple_views and "multiple views" in report.lower():
        #     num_multiple_views += 1
        #     multiple_views = False
        csv_writer.writerow([unique_id, patient_id, study_id, image_paths, image_labels, findings, impression, report, split])
        unique_id += 1
    #     if unique_id >= 100:
    #         break
    # if unique_id >= 100:
    #     break

out_file.close()

# print(num_mult_imgs, num_multiple_views)
# for k,v in sorted(key_count.items(), key=lambda item: -item[1]):
#     print(f"{v}, {k}: {keys[k][0]}")
#     print(f"\t{keys[k][1]}")

