import pandas as pd
import numpy as np


def baseline_model(program_correct_filename="is_generated_program_correct"):
    '''
    Baseline model if the first sampled program is selected with no extra processing.

    Reads from '{program_correct_filename}.csv'.

    Returns {int accuracy}.
    '''
    # Read from cached csv
    generated_programs = pd.read_csv(f'{program_correct_filename}.csv')

    # Get all possible prompt ids
    generated_prompt_ids = set(generated_programs['Prompt id'])

    num_correct = 0
    for prompt_id in generated_prompt_ids:
        first_program_row = generated_programs[generated_programs["Prompt id"]
                                               == prompt_id].iloc[0]
        num_correct += first_program_row["Correct"]
    return num_correct/len(generated_prompt_ids)


def pass_at_k(k=5, program_correct_filename="is_generated_program_correct"):
    '''
    pass@k from first k selected programs.

    Reads from '{program_correct_filename}.csv'.

    Returns {int accuracy}.
    '''

    def unbiased_estimate(n, c, k):
        """
        :param n: total number of samples
        :param c: number of correct samples
        :param k: k in pass@$k$
        """
        if n - c < k:
            return 1.0
        return 1.0 - np.prod(1.0 - k / np.arange(n - c + 1, n + 1))

    # Read from cached csv
    generated_programs = pd.read_csv(f'{program_correct_filename}.csv')

    # Get all possible prompt ids
    generated_prompt_ids = set(generated_programs['Prompt id'])

    num_correct = 0
    for prompt_id in generated_prompt_ids:
        progs_from_prompt_id = generated_programs[generated_programs["Prompt id"] == prompt_id]

        num_correct_samples = progs_from_prompt_id["Correct"].sum()

        num_correct += unbiased_estimate(len(progs_from_prompt_id),
                                         num_correct_samples, k)
    return num_correct/len(generated_prompt_ids)


def naive_matching(num_test_cases_to_solve=3,  gen_program_gen_test_case_filename="gen_program_gen_testcase"):
    '''
    First model. Naively selects the program that solves the most of the test cases that are solved by the most programs.

    Selects the first {num_test_cases_to_solve} most solved test cases to be regarded as the "ground truth test cases",
    and the first program to solve all {num_test_cases_to_solve} test cases is selected.

    If no program solves all {num_test_cases_to_solve} test cases, then the program that solves the most is selected.
    If there's a tie in the number of test cases solved, then the "rank" of each test case is computed (how many programs
    solve it), and the program with the highest rank sum is chosen.

    Reads from '{gen_program_gen_test_case_filename}.csv'.

    Returns {int accuracy}
    '''

    # Reads from cachced csv
    gen_everything = pd.read_csv(f'{gen_program_gen_test_case_filename}.csv')

    # Get all possible prompt ids
    generated_prompt_ids = set(gen_everything['Prompt id'])

    num_correct = 0

    counter = 0
    for prompt_id in generated_prompt_ids:
        gen_from_prompt_id = gen_everything[gen_everything["Prompt id"] == prompt_id]

        most_solved_test_cases = gen_from_prompt_id.groupby(
            by="Generated Test Case ID")["Result"].sum().sort_values(ascending=False)[:num_test_cases_to_solve]

        test_case_set = set(most_solved_test_cases.index)

        programs_from_test_cases = gen_from_prompt_id[gen_from_prompt_id["Generated Test Case ID"].isin(
            test_case_set)].copy(deep=True)

        # Create a new column with a value based on how widespread the test cases were - this is used for a
        # tiebreaker scenario where two programs have solved the same number of test cases but different test cases
        programs_from_test_cases["Rank"] = programs_from_test_cases["Generated Test Case ID"].apply(
            lambda id: most_solved_test_cases[id])

        def rank(df):
            new_df = {}

            new_df["Result"] = df["Result"].sum()

            new_df["Rank"] = df[df["Result"]]["Rank"].sum()

            return pd.Series(new_df)

        programs_solving_tests = programs_from_test_cases.groupby(
            by=["Generated Program ID", "Is generated program correct"]).apply(rank)

        programs_solving_tests = programs_solving_tests.sort_values(
            ['Result', 'Rank'], ascending=[False, False])

        best_program_id, is_program_correct = programs_solving_tests.index[0]
        num_correct += is_program_correct
        counter += 1

    return num_correct/len(generated_prompt_ids)
