import os
import json
import argparse
from get_tasks_pipeline import main as get_tasks_pipeline
from tqdm import tqdm

def load_jsonl(file_path):
    return [json.loads(line) for line in open(os.path.join(file_path), 'r')]

def load_json(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    f.close()
    return data

def main(args):
    data = load_json(args.repo_list_path)
    print(f"Total number of repos: {len(data)}")

    print(f"Staring pulling PRs from repo #{args.start_idx+1}")

    if not os.path.exists(args.prs_path):
        os.mkdir(args.prs_path)
    if not os.path.exists(args.tasks_path):
        os.mkdir(args.tasks_path)
    
    if args.num_repos_to_process == -1:
        args.num_repos_to_process = len(data[args.start_idx:])

    for git_repo in tqdm(data[args.start_idx:args.start_idx+args.num_repos_to_process]):
        get_tasks_pipeline(
            repos = [git_repo],
            path_prs=args.prs_path,
            path_tasks=args.tasks_path,
        )

if __name__ =='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--repo_list_path', help="Path to the file of a list of valid GitHub repos", type=str, default="valid_top_pypi_gitrepos.jsonl")
    parser.add_argument('--prs_path', help="Path to save Git PRs", type=str, default="prs")
    parser.add_argument('--tasks_path', help="Path to save tasks", type=str, default="tasks")
    parser.add_argument('--start_idx', help="The index to start generating tasks from", type=int, default=0)
    parser.add_argument('--num_repos_to_process', help="The number of repos to process, starting from start_idx. defaults to -1 to fully process", type=int, default=-1)
    args = parser.parse_args()
    
    main(args)
    