from openai import OpenAI
import argparse
import json
from tqdm import tqdm
import os


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--response_fold", type=str, required=True)
    parser.add_argument("--result_fold", type=str, required=True)
    parser.add_argument("--sample_num", type=str, default=5)
    args = parser.parse_args()

    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    response_files = os.listdir(args.response_fold)
    os.makedirs(args.result_fold, exist_ok=True)
    _tqdm = tqdm(total=len(response_files) * args.sample_num * 11)
    for response_file in response_files:
        result_file = response_file[:-len(".jsonl")] + "_openai_evaluation.jsonl"
        response_file = os.path.join(args.response_fold, response_file)
        f_in = open(response_file, "r", encoding="utf-8")
        result_file = os.path.join(args.result_fold, result_file)
        f_out = open(result_file, "w", encoding="utf-8")
        print(f"Evaluating {response_file} and writing to {result_file}")
        for line in f_in:
            data = json.loads(line)
            response = client.moderations.create(input=f"{data['prompt']} {data['response']}").results[0].categories
            json_response = json.loads(response.json())
            harmful_type = ";".join([k for k, v in json_response.items() if v])
            evaluation_result = {**data, **{"harmful_type": harmful_type}}
            f_out.write(json.dumps(evaluation_result) + "\n")
            _tqdm.update(1)


if __name__ == "__main__":
    main()
