import time
import os
import random
import json
import sys
from multiprocessing import Pool, cpu_count

from utils.load_json import load_json
from utils.save_json import save_json
from utils.call_gpt import gen_chatgpt_outputs
from utils.sample_data import sampling_data

EVAL_PROMPT_TEMPLATE = """Your task is to give a score for the following statement based on the score guidelines and your knowledge of the party.
{{"{wait_for_judge_statement}"}}.
{{"choices": ["not at all", "very poorly", "poorly", "somewhat poorly", "somewhat", "moderately", "somewhat well", "well", "very well", "perfectly"], "choice_numbers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}}.
Your score should in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], and explain why you give this score in one sentence. The reason should be specific to the party's stance.
Please respond in the following JSON format:
{{"explanation": "<one sentence explanation>","score": <choice_number>}}
"""

FEASIBILITY_EVAL_PROMPT_TEMPLATE = """Your task is to give a score for the following statement based on the score guidelines and your knowledge of the party.
{{"{feasibility_judge_statement}"}}.
{{"choices": ["not at all", "very poorly", "poorly", "somewhat poorly", "somewhat", "moderately", "somewhat well", "well", "very well", "perfectly"], "choice_numbers": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}}.
Your score should in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], and explain why you give this score in one sentence. The reason should be specific to the party's stance and the background information.
Please respond in the following JSON format:
{{"explanation": "<one sentence explanation>","score": <choice_number>}}
"""

CHOICE_LIST = ["not at all", "very poorly", "poorly", "somewhat poorly", "somewhat", "moderately", "somewhat well", "well", "very well", "perfectly"]

feasibility_judge_template = """Background: 
{{Begin of the background}}
'''{background}'''
{{End of the background}}
Consider the following statement: 
{{Begin of the resolution}}
'''{resolution}'''
{{End of the resolution}}
The {party_name}'s opinion is: 
{{Begin of the stance}}
'''{stance}'''
{{End of the stance}}
To what extent is this resolution feasible based on the background and {party_name}'s stance? **You don't need to evaluate if the resolution aligns with the stance, you should only evaluate the feasibility of the resolution itself.**

Please follow this scoring guideline:
- **Score 0-3**: If the resolution is largely disconnected from the background and stance, proposes clearly unfeasible measures, or if there are contradictions/inconsistencies in the resolution, or if it ignores practical constraints (resources, timeframe, policy frameworks, international context), provides only idealistic or unrealistic goals without actionable pathways. IF THE CONTENT IS EVEN NOT IN A RESOLUTION FORMAT, YOU SHOULD GIVE 0 DIRECTLY.
- **Score 4-6**: If the resolution shows partial consideration of the background and stance but lacks comprehensiveness. The measures are somewhat feasible but the implementation pathway is vague or insufficiently adapted to real-world conditions. It contains minor inconsistencies with the background. The overall direction is correct, but insufficient details reduce practical viability.
- **Score 7-9**: If the resolution fully integrates the background conditions and stance requirements, offering realistic and implementable measures with a clear pathway. For supportive stances, the resolution effectively advances the goals within the existing background. For opposing stances, it addresses and mitigates the concerns raised in the stance, ensuring alignment between the background and party position. It comprehensively covers all key points from the stance, provides solutions to background-related constraints, and demonstrates precise, strong, and highly feasible execution plans."""

wait_for_judge_statement_template = """Background: 
{{Begin of the background}}
'''{background}'''
{{End of the background}}
Consider the following statement: 
{{Begin of the resolution}}
'''{resolution}'''
{{End of the resolution}}
The {party_name}'s opinion is: 
{{Begin of the stance}}
'''{stance}'''
{{End of the stance}}
To what extent does this resolution provide specific implementation details that capture {party_name}'s stances? **Regardless of whether the stances itself is supportive or opposing to the issue.**

Please follow this scoring guideline:
- **Score 0-3**: If the resolution does not start with "The European Parliament" (except when it only has "assistant" before it), or if the resolution only rephrases content from the stances without providing specific implementation details, omits key points mentioned in the stances, or if it contains elements that weaken/dilute the benefits sought in supportive stances (for opposing stances, if it promotes/strengthens what the party opposes), or it doesn't match the topic about the stances. IF THE CONTENT IS EVEN NOT IN A RESOLUTION FORMAT, YOU SHOULD GIVE 0 DIRECTLY.
- **Score 4-6**: If the resolution provides some feasible implementation details for the stances' requirements but lacks comprehensiveness (e.g. only addresses some aspects, missing some points from the stances) or contains minor conflicts with party interests (e.g. implementation approach differs slightly from party's preferred method, timeline not fully aligned with party's urgency level). The resolution should cover at least half of the key points mentioned in the stances.
- **Score 7-9**: If the resolution provides detailed, concrete and practically feasible implementation measures that fully strengthen and implement supportive stances (for opposing stances, score high if the resolution effectively addresses and resolves the opposition's concerns) without any dilution or compromise. The resolution must comprehensively address ALL points raised in the stances, with higher scores for more detailed coverage of each point."""

def gpt_single_eval_agent(test_data, eval_text, parl_term, topic_name, party_name, party_stances, args):

    party_scores = []
    party_explanations = []
    party_stance = '\n\n'.join([stance for stance in party_stances if stance.startswith((party_name, ' ' + party_name))])
    
    ## Resolution score
    wait_for_judge_statement = wait_for_judge_statement_template.format(background = test_data['background'],
                                                                        resolution = eval_text,
                                                                        party_name = party_name,
                                                                        stance = party_stance,
                                                                        title = test_data['title'])

    prompt = EVAL_PROMPT_TEMPLATE.format(wait_for_judge_statement = wait_for_judge_statement)
    repeat_count = 0
    extra_prompt = ''
    tmp_temperature = 0
    while True:
        text = gen_chatgpt_outputs(sysprompt = 'You are a helpful assistant that give the most possible choice number based on the demonstrations.', prompt = prompt, max_token = 100, temperature = tmp_temperature)
        try:
            text = json.loads(text)
            party_scores.append(int(text['score']))
            party_explanations.append(text['explanation'])
            break
        except:
            print(f'{text} is not in JSON format', file = sys.stderr)
            repeat_count += 1
            if repeat_count > 10:
                time.sleep(5)
                print('Repeat_count > 10, continue', file = sys.stderr)
                # return a round down average score
                party_scores.append(4)
                party_explanations.append("The response format is incorrect after multiple attempts.")
                break
            if repeat_count > 1:
                extra_prompt += '\nYour should respond in JSON format. Now is your turn:'
                tmp_temperature += 0.1
            else:
                extra_prompt = '\nYou can ONLY RESPONSE in JSON format.'
                tmp_temperature += 0.1
            prompt += extra_prompt

    ## Feasibility score
    feasibility_judge_statement = feasibility_judge_template.format(background = test_data['background'],
                                                                        resolution = eval_text,
                                                                        party_name = party_name,
                                                                        stance = party_stance,
                                                                        title = test_data['title'])
    prompt = FEASIBILITY_EVAL_PROMPT_TEMPLATE.format(feasibility_judge_statement = feasibility_judge_statement)
    repeat_count = 0
    extra_prompt = ''
    tmp_temperature = 0
    while True:
        text = gen_chatgpt_outputs(sysprompt = 'You are a helpful assistant that give the most possible choice number based on the demonstrations.', prompt = prompt, max_token = 100, temperature = tmp_temperature)
        try:
            text = json.loads(text)
            party_scores.append(int(text['score']))
            party_explanations.append(text['explanation'])
            break
        except:
            print(f'{text} is not in JSON format', file = sys.stderr)
            repeat_count += 1
            if repeat_count > 10:
                time.sleep(5)
                print('Repeat_count > 10, continue', file = sys.stderr)
                # return a round down average score
                party_scores.append(4)
                party_explanations.append("The response format is incorrect after multiple attempts.")
                break
            if repeat_count > 1:
                extra_prompt += '\nYour should respond in JSON format. Now is your turn:'
                tmp_temperature += 0.1
            else:
                extra_prompt = '\nYou can ONLY RESPONSE in JSON format.'
                tmp_temperature += 0.1
            prompt += extra_prompt
    
    party_score = sum(party_scores) // len(party_scores)
    return party_name, party_score, party_scores, party_explanations

def get_gpt_eval_scores(test_data, eval_text, parl_term, topic_name, party_name_list, args):
    
    all_scores = {}
    all_party_scores = {}
    all_explanations = {}
    party_stances = {}

    for cur_party_name in party_name_list:
        for stance in test_data['stances']:
            if stance['party_name'] == cur_party_name:
                party_stances[cur_party_name] = stance['stance']
                break

    for cur_party_name in party_name_list:
        all_scores[cur_party_name] = 0
        all_party_scores[cur_party_name] = []
        all_explanations[cur_party_name] = ""

    with Pool(min(len(party_name_list), cpu_count())) as pool:
        eval_data = [(test_data, eval_text, parl_term, topic_name, cur_party_name, party_stances[cur_party_name], args) for cur_party_name in party_name_list]
        results = pool.starmap(gpt_single_eval_agent, eval_data)

    for party_name, score, party_scores, explanation in results:
        all_scores[party_name] = score
        all_explanations[party_name] = explanation
        all_party_scores[party_name] = party_scores

    return all_scores, all_party_scores, all_explanations






        
                

    








