from dataset_processing_functions import *
import json


def process_mbpp(modified_mbpp=False):
    '''
    Processes the Mostly Basic Python Problems dataset from the jsonl file and returns a list of dicts.

    Dict format -
    text - gives prompt
    code - gives correct code
    test_list - gives code to test program as a list
    function_name - function name processing for integration with OpenAI's API
    signature - signature (which includes parameters) from ground truth program

    Modified MBPP throws out only one problem from the MBPP set, which contains a unique assert formatting case that
    is awkward to handle.

    It throws out #912, with prompt "Write a function to find ln, m lobb number."
    '''

    def get_signature(function_name, ground_truth_code):
        '''
        Get the ground-truth function signature (def ___(a,b,c)) from the ground-truth code and
        corresponding function_name.
        '''

        function_name_pos = ground_truth_code.find(f"def {function_name}")
        function_name_end_pos = ground_truth_code.find(":", function_name_pos)

        return ground_truth_code[function_name_pos:function_name_end_pos]

    problems_list = []

    dataset_path = "mbpp.jsonl"

    if modified_mbpp == True:
        dataset_path = "modified_mbpp.jsonl"

    with open(dataset_path, "r") as mbpp_dataset:
        for one_problem in mbpp_dataset:
            problem_dict = json.loads(one_problem)

            # Change format for OpenAI's API
            first_test = problem_dict["test_list"][0]

            #
            function_name = first_test[len(
                "assert "):first_test.find("(")].strip()

            # Very specific processing - some of MBPP's problems are uniquely formatted, so must account for this.
            # Hardcoded, so this may not be the best solution.
            if first_test[len("assert "):len("assert int(")] == "int(":
                rest_of_assert = first_test[len("assert int("):]

                function_name = rest_of_assert[:rest_of_assert.find(
                    '(')].strip()

            parameters = get_parameters(first_test)

            parameter_typing = [get_parameter_typing(x) for x in parameters]

            # Detailed typing - for processing things like List[Int]
            # detailed_param_typing = [
            #     get_parameter_detailed_typing(x) for x in parameters]
            # problem_dict["detailed_typing"] = detailed_param_typing

            return_type = get_parameter_typing(first_test.split("==")[1])

            problem_dict["return_type"] = return_type

            problem_dict["parameter_typing"] = parameter_typing

            problem_dict["function_name"] = function_name

            problem_dict["signature"] = get_signature(
                function_name, problem_dict["code"]).strip()

            problem_dict["parameters"] = get_parameters(
                problem_dict["signature"])

            problems_list.append(problem_dict)

    return problems_list


def mbpp_jsonl_to_csv():
    problems_list = process_mbpp()

    for i in range(len(problems_list)):
        problems_list[i]["Prompt_id"] = i

        test_cases = json.dumps(problems_list[i]["test_list"])
        problems_list[i]["test_list"] = test_cases

        parameter_typing = json.dumps(problems_list[i]["parameter_typing"])
        problems_list[i]["parameter_typing"] = parameter_typing

        parameters = json.dumps(problems_list[i]["parameters"])
        problems_list[i]["parameters"] = parameters

        parameters = json.dumps(problems_list[i]["detailed_typing"])
        problems_list[i]["detailed_typing"] = parameters

    mbpp_df = pd.DataFrame(problems_list)
    mbpp_df.to_csv("mbpp.csv")
