import argparse
import os
import json
from localize.util.load_data_to_swe import load_crawled_data_to_swe

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--raw_file",
        type=str,
        default=None,
        help="Repo-level data file",
    )
    parser.add_argument("--output_folder", type=str, required=True)
    parser.add_argument("--output_file", type=str, default="all_data.jsonl")

    args = parser.parse_args()
    args.output_file = os.path.join(args.output_folder, args.output_file)

    os.makedirs(args.output_folder, exist_ok=True)

    # write the arguments
    with open(f"{args.output_folder}/init-filter-args.json", "w") as f:
        json.dump(vars(args), f, indent=4)

    all_data = load_crawled_data_to_swe(args.raw_file)

    with open(args.output_file, 'w') as f:
        for item in all_data:
            f.write(json.dumps(item, ensure_ascii=False) + '\n')
    
