import json
import re
import regex
gt_columns_dict = {
    '(B)': '(B): LowerBackPain_found_in_note',
    '(C)': '(C): WeightLoss_found_in_note',
    '(D)': '(D): AppetiteLoss_found_in_note',
    '(E)': '(E): Jaundice_found_in_note',
    '(F)': '(F): Pruritus_found_in_note',
    '(G)': '(G): Indigestion_found_in_note',
    '(H)': '(H): Steatorrhea_found_in_note',
    '(I)': '(I): UrineColorChange_found_in_note',
    '(J)': '(J): Constipation_found_in_note',
    '(K)': '(K): Nausea_found_in_note',
    '(L)': '(L): Vomiting_found_in_note',
    '(M)': '(M): Diarrhea_found_in_note',
    '(N)': '(N): GasorBloating_found_in_note',
    '(O)': '(O): FatigueMalaiseLethargy_found_in_note',
    '(P)': '(P): EarlySatiety_found_in_note',
    '(Q)': '(Q): BloodGlucose_found_in_note',
    '(R)': '(R): GI_Bleed_found_in_note',
    '(S)': '(S): Melena_found_in_note',
    '(T)': '(T): BRBPR_found_in_note',
    '(U)': '(U): AbdominalPain_found_in_note',
    '(V)': '(V): UpperMidBackPain_found_in_note',
}

def gt_to_response(data_dict):
    """
    Args:
        gt (dict): a dictionary with (gt column, gt value) pairs from one row in the data
    Returns:
        str: the intended response for the LLM
    """
    result = {}
    for col, value in data_dict.items():
        result[col.split("_")[0]] = value

    return str(result)


def parse_response(response, response_start="My answer is: ", verbose=False):
    """
    Args:
        response (str): the raw LLM response in its entirety
        response_start (str): the prefix to the response, to be removed from response
    Outputs
        dict: a dictionary with (gt column, gt value) pairs from one row in the data
    """
    # Remove everything before and including "My answer is: "
    response = response.removeprefix(response_start).strip()
    response = response.replace("```json", '')
    response = response.replace("```", '')
    response = response.replace('""', '"')
    response = response.replace("'", '"')  # Ensure JSON uses double quotes
    json_match = regex.search(r"(\{(?:[^{}]|(?R))*\})", response, re.DOTALL)
    
    # Regular expression to extract the JSON content
    # Build the regex dynamically using a lookbehind
    # pattern = rf"(?<={re.escape(response_start)})\s*(\{{.*\}})$"
    # json_match = re.search(pattern, response, re.DOTALL)
    answers = {col: 0 for col in gt_columns_dict.values()}

    if json_match:
        json_str = json_match.group(0)  # Extracted JSON string
        # Load the JSON
        try:
            parsed_data = json.loads(json_str)
            for key, value in gt_columns_dict.items():
                if value.split('_')[0] in parsed_data.keys():
                    answers[value] = parsed_data[value.split('_')[0]]
                elif key in parsed_data.keys():
                    answers[value] = parsed_data[key]
                else:
                    answers[value] = 0

        except json.JSONDecodeError as e:
            print("Error parsing JSON:", e)
            if verbose:
                print(response)
    else:
        print("No JSON found in the text.")
        if verbose:
            print(response)
    return answers
