
"""
Generate all labels and results
Uncommitted results will be overwritten
"""

import util_labels
import util_results
import util_repo

import os
import argparse
import shutil
import pandas as pd
from datasets import load_dataset


def run_all(limit=None):
    """ Run all experimental configurations in sequence"""

    number_of_experiments = 3

    results_folder = "results"  # for experiment results and final tables
    reference_results = "inputs/reference_results.jsonl"  # file containing reference (external) accuracies

    # download and convert the swe-bench dataset
    print("\nDownloading the SWE-bench dataset ...")
    df_benchmark = load_dataset('princeton-nlp/SWE-bench', split='test').to_pandas()

    # run all experiments (semantic and tokens-only)
    for expno in range(1, number_of_experiments+1):
            run_single(df_benchmark, expno, 'semantic', limit)
            run_single(df_benchmark, expno, 'tokens', limit)

    # compile the final results tables
    util_results.compile_tables(results_folder, reference_results)

    print("\nALL EXPERIMENTS COMPLETE\n")


def run_single(df_benchmark, expno, identifier_mode='semantic', limit=None):
    """ Run a single experimental configuration"""

    repo_folder = "repos"  # for local clones

    print(f"\nEXECUTING EXPERIMENT {str(expno)} (IDENTIFIER MODE: {identifier_mode})\n")

    input_sample = "inputs/samples/experiment"+str(expno)+"_sample.txt"
    input_annotations = "inputs/annotations/experiment"+str(expno)+"_annotations.csv"
    experiment_folder = "results/experiment"+str(expno)+"_"+identifier_mode

    # clear previous results for this experiment
    if os.path.isdir(experiment_folder):  shutil.rmtree(experiment_folder)
    os.mkdir(experiment_folder)

    print("Reading input files\n")
    df_sample = pd.read_csv(input_sample)            # sample instances
    df_annotations = pd.read_csv(input_annotations)  # human (swe-v) annotations

    print("Cloning repositories\n")
    repo_names = df_benchmark["repo"].unique()
    for repo_name in repo_names:
        util_repo.clone_from_github(repo_folder, repo_name)
    print()

    # generate labels and output to json
    df_labels = util_labels.generate_labels(df_benchmark, df_sample, identifier_mode, limit)
    util_labels.save_labels_to_json(df_labels, experiment_folder)

    # generate results and confusion matrix
    util_results.generate_results(df_labels, df_annotations, experiment_folder)

    print("EXPERIMENT COMPLETE\n")


# main script

if __name__ == "__main__":

    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Run all experimental configurations')
    parser.add_argument('--limit',
                        type=int,
                        default=None,
                        help='(For testing purposes) Enforce a limit on the number of sample instances used in each experiment.')
    args = parser.parse_args()

    # run all experiments
    run_all(args.limit)
