from experiment import HardLevelExperiment
from hard_level import Math_level

if __name__ == '__main__':
    
    ############### Run INCMath ###############
    is_train = False # if True, run the training set
    method = 'incmath'

    dataset_list = ['algebra','counting & probability', 'geometry', 'number theory', 'intermediate algebra','precalculus', 'prealgebra']
    decision_model = 'gpt-4o'

    reasoning_model = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
    
    
    hard_levels = ['Level 5']

    for hard_level in hard_levels:
        # Math_level(decision_model, verbose=False, zero_shot=False, method_list=['incmath'], is_train=is_train, hard_level=hard_level)
        for dataset in dataset_list:
            if 'gpt' in reasoning_model:
                store_path = f"results/gpt/{reasoning_model}_{dataset}_{hard_level}_incmath_"
            else:
                store_path = f"results/{reasoning_model}_{dataset}_{hard_level}_incmath_"
            
            store_path = store_path + decision_model + '_new.jsonl'
            if is_train:
                store_path = store_path.replace('new', 'train')
            exp = HardLevelExperiment(decision_model, dataset, method, store_path, hard_level=hard_level, verbose=False, zero_shot=False, reasoning_model=reasoning_model, is_train=is_train)
            exp.run()