import json
from transfer_openai import send_request
from tqdm import tqdm

reviewer_prompt = open('../prompts/refer.txt', 'r').read()
ac_prompt = open('../prompts/refer_ac.txt', 'r').read()
models = ['gpt-3.5-turbo', 'gpt-4o', 'gpt-3.5-turbo-1106', 'gpt-4o']

def refer(input_path, output_path):
    with open(input_path, 'r') as f:
        dataset = json.load(f)

    with open(output_path, 'r') as f:
        info = json.load(f)
        
    for data in tqdm(dataset[len(info):]):
        reference = data['reference']
        question = data['question']
        context = data['context']
        answer_1 = data['student_answer_a']
        answer_2 = data['student_answer_b']
        responses = []
        reviewer_prompt_1 = reviewer_prompt.replace('[context]', context).replace('[question]', question).replace('[answer]', answer_1).replace('[reference]', reference)
        for i in range(3):
            model = models[i]
            response = send_request(reviewer_prompt_1, model, temperature=0.7)
            responses.append(response)
        reviewer_prompt_2 = ac_prompt.replace('[context]', context).replace('[question]', question).replace('[answer]', answer_2).replace('[reference]', reference).replace('[Peer_response1]', responses[0]).replace('[Peer_response2]', responses[1]).replace('[Peer_response3]', responses[2])
        final_decision_1 = send_request(reviewer_prompt_2, models[3], temperature=0.7)
        # Evaluation Formm (Answer by starting with "Analysis:" to analyze the given example regarding theevaluation criteria as concise as possible, and then give the numeric rating on the next line by "Rating:
        rating_1 = final_decision_1.split('Rating:')[-1].strip().replace('*', '')
        responses = []
        reviewer_prompt_2 = reviewer_prompt.replace('[context]', context).replace('[question]', question).replace('[answer]', answer_2).replace('[reference]', reference)
        for i in range(3):
            model = models[i]
            response = send_request(reviewer_prompt_2, model, temperature=0.7)
            responses.append(response)
        reviewer_prompt_2 = ac_prompt.replace('[context]', context).replace('[question]', question).replace('[answer]', answer_1).replace('[reference]', reference).replace('[Peer_response1]', responses[0]).replace('[Peer_response2]', responses[1]).replace('[Peer_response3]', responses[2])
        final_decision_2 = send_request(reviewer_prompt_2, models[3], temperature=0.7)
        rating_2 = final_decision_2.split('Rating:')[-1].strip().replace('*', '')
        rating_1 = float(rating_1)
        rating_2 = float(rating_2)
        if rating_1 > rating_2:
            final_decision = 'a'
        elif rating_1 < rating_2:
            final_decision = 'b'
        else:
            final_decision = 'tie'
        data['ReFeR_preference'] = final_decision
        info.append(data)
        with open(output_path, 'w') as f:
            json.dump(info, f, indent=4)
    
refer('../benchmark/final_version/pol.json', '../benchmark/final_version/pol_refer.json')
        