from experiment import HardLevelExperiment
from eval import eval_file
from utils.utils import extract_hard_level_file


def extract_hard_level(model_name, dataset_list, method_list, level='Level 5'):
    """
    extract the level 5 problem and its results from the whole file
    """ 
    for method in method_list:
        for dataset in dataset_list:
            file = f'results/{model_name}_{dataset}_{method}.jsonl'
            extract_hard_level_file(file, level)
            print(f"Extracted level 5 problems from {file}")


if __name__ == '__main__':

    dataset_list = ['intermediate algebra', 'prealgebra']
    hard_level = "Level 5"
    model_name = 'gpt-4o'
    # model_name = 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo'
    verbose = False
    zero_shot = False
    # for method in ['MajoratityVoting']:
    for method in ['metamath']:
        scores = []
        for dataset in dataset_list:
            print(f"Running experiment on {dataset} with hard level {hard_level} for method {method}")
            if 'gpt' in model_name:
                store_path = f"results/gpt/{model_name}_{dataset}_{hard_level}_{method}.jsonl"
            else:
                store_path = f"results/{model_name}_{dataset}_{hard_level}_{method}.jsonl"
            exp = HardLevelExperiment(model_name, dataset, method, store_path, hard_level=hard_level, verbose=verbose)
            exp.run()
            scores += eval_file(store_path)
        accuracy = sum(scores) / len(scores)
        print(f"Average accuracy for model {model_name} of method {method} is {accuracy}")