import json
import re
import regex
gt_columns_dict = {
    "(A)": "(A): Diarrhea_found_in_note",
    "(B)": "(B): Constipation_found_in_note",
    "(C)": "(C): Nausea_found_in_note",
    "(D)": "(D): Vomiting_found_in_note",
    "(E)": "(E): Abdominal Pain_found_in_note",
    "(F)": "(F): Abdominal Distension_found_in_note",
    "(G)": "(G): Fatigue_found_in_note",
    "(H)": "(H): Allergic reaction_found_in_note",
    "(I)": "(I): Weight loss_found_in_note",
    "(J)": "(J): Erythema_found_in_note",
    "(K)": "(K): Hair loss_found_in_note",
    "(L)": "(L): Neutropenia_found_in_note",
    "(M)": "(M): Anemia_found_in_note",
    "(N)": "(N): Abnormal liver function_found_in_note",
    "(O)": "(O): Dyspnea_found_in_note",
    "(P)": "(P): Appetite Loss_found_in_note",
    "(Q)": "(Q): Fever_found_in_note",
    "(R)": "(R): Chills_found_in_note",
    "(S)": "(S): Jaundice_found_in_note",
    "(T)": "(T): Thrombocytopenia_found_in_note",
    "(U)": "(U): Sensory Neuropathy_found_in_note",
    "(V)": "(V): Motor Neuropathy_found_in_note",
    "(W)": "(W): Cold-induced Neuropathy_found_in_note",
}


def gt_to_response(data_dict):
    """
    Args:
        gt (dict): a dictionary with (gt column, gt value) pairs from one row in the data
    Returns:
        str: the intended response for the LLM
    """
    result = {}
    for col, value in data_dict.items():
        result[col.split("_")[0]] = value

    return str(result)


def parse_response(response, response_start="My answer is:", verbose=False):
    """
    Args:
        response (str): the raw LLM response in its entirety
        response_start (str): the prefix to the response, to be removed from response
    Outputs
        dict: a dictionary with (gt column, gt value) pairs from one row in the data
    """
    # Remove everything before and including "My answer is: "
    response = response.removeprefix(response_start).strip()
    response = response.replace("```json", '')
    response = response.replace("```", '')
    response = response.replace('""', '"')
    response = response.replace("'", '"')  # Ensure JSON uses double quotes
    json_match = regex.search(r"(\{(?:[^{}]|(?R))*\})", response, re.DOTALL)
    
    # Regular expression to extract the JSON content
    # Build the regex dynamically using a lookbehind
    # pattern = rf"(?<={re.escape(response_start)})\s*(\{{.*\}})$"
    # json_match = re.search(pattern, response, re.DOTALL)
    answers = {col: 0 for col in gt_columns_dict.values()}

    if json_match:
        json_str = json_match.group(0)  # Extracted JSON string
        # Load the JSON
        try:
            parsed_data = json.loads(json_str)
            for key, value in gt_columns_dict.items():
                if value.split('_')[0] in parsed_data.keys():
                    answers[value] = parsed_data[value.split('_')[0]]
                elif key in parsed_data.keys():
                    answers[value] = parsed_data[key]
                else:
                    answers[value] = 0

        except json.JSONDecodeError as e:
            print("Error parsing JSON:", e)
            if verbose:
                print(response)
    else:
        print("No JSON found in the text.")
        if verbose:
            print(response)
    return answers

if __name__ == '__main__':
    res = """
    My answer is:
    {"(A): Diarrhea": 0, "(B): Constipation": 0, "(C): Nausea": 0, "(D): Vomiting": 0, "(E): Abdominal Pain": 0, "(F): Abdominal Distension": 0, "(G): Fatigue": 0, "(H): Allergic reaction": 0, "(I): Weight loss": 0, "(J): Erythema": 0, "(K): Hair loss": 0, "(L): Neutropenia": 0, "(M): Anemia": 0, "(N): Abnormal liver function": 0, "(O): Dyspnea": 0, "(P): Appetite Loss": 0, "(Q): Fever": 0, "(R): Chills": 0, "(S): Jaundice": 0, "(T): Thrombocytopenia": 1, "(U): Sensory Neuropathy": 1, "(V): Motor Neuropathy": 0, "(W): Cold-induced Neuropathy": 1}
    """
    # res = """"
    #     ```json
    #     {
    #         ""(A): Diarrhea"": 0,
    #         ""(B): Constipation"": 0,
    #         ""(C): Nausea"": 0,
    #         ""(D): Vomiting"": 0,
    #         ""(E): Abdominal Pain"": 0,
    #         ""(F): Abdominal Distension"": 0,
    #         ""(G): Fatigue"": 0,
    #         ""(H): Allergic reaction"": 0,
    #         ""(I): Weight loss"": 0,
    #         ""(J): Erythema"": 0,
    #         ""(K): Hair loss"": 0,
    #         ""(L): Neutropenia"": 0,
    #         ""(M): Anemia"": 0,
    #         ""(N): Abnormal liver function"": 0,
    #         ""(O): Dyspnea"": 0,
    #         ""(P): Appetite Loss"": 0,
    #         ""(Q): Fever"": 0,
    #         ""(R): Chills"": 0,
    #         ""(S): Jaundice"": 0,
    #         ""(T): Thrombocytopenia"": 0,
    #         ""(U): Sensory Neuropathy"": 0,
    #         ""(V): Motor Neuropathy"": 1,
    #         ""(W): Cold-induced Neuropathy"": 0
    #     }
    #     ```
    # """
    print(parse_response(res, response_start='My answer is:', verbose=True))