import json

import sys
import os
from benchmark.api import BenchmarkManager

from pipeline.utils import get_user_input, get_user_input_multiline, print_colored

file_ids = ["cf526c56-48be-4ca3-b1d0-eb2d8100ed05"]


def main(loop = True):
    base_path = 'benchmark/platforms'
    manager = BenchmarkManager()
    
    # available_scenarios = manager.get_scenarios(verbose=False)

    
    # print_colored(f"Available scenarios: {', '.join(available_scenarios)}", 'green')
    # scenario = get_user_input("Enter the scenario: ")
    
    # while scenario not in available_scenarios:
    #     print_colored(f"Scenario '{scenario}' not found. Available scenarios are: {', '.join(available_scenarios)}", 'red')
    #     scenario = get_user_input("Enter the scenario: ")
    
    # available_file_ids = manager.list_file_ids(scenario=scenario)
    
    # completed_ids = os.listdir("pipeline/output".format(scenario))
    # completed_ids = []
    
    # available_file_ids = [id for id in available_file_ids if id not in completed_ids]
    available_file_ids = file_ids
    
    print_colored("Choose the webpage model to evaluate", 'yellow')
    model = get_user_input("Enter the model name: ")
    
    save_dir = f"pipeline/output/webpage-{model}"
    
    if loop:
        for file_id in available_file_ids:
            scenario = manager.get_issue_scenario(file_id=file_id)
            issue = manager.get_issue_context(file_id=file_id)
            save_path = "{}/{}/{}".format(save_dir,scenario, file_id)
            os.makedirs(save_path, exist_ok=True)
            with open ("{}/issue.txt".format(save_path), "w") as f:
                f.write(issue)
                
            hints = manager.get_hints(file_id=file_id)

            for hint_level in range(-1, 3):  # Loop through hint levels -1(None), 0, 1, and 2
                if hint_level == -1:
                    hint = "None"
                else:
                    hint = hints[hint_level]
                print_colored(f"Scenario: {scenario} | File ID: {file_id} | hint Level: {hint_level}", 'yellow')
                issue = issue.replace(r'\n', '\n').replace('\\',"")
                print("\n")
                print(issue)
                print("\n")
                
                print("\n")
                print("hint is: ",hint)
                print("\n")
                
                with open ("prompts/evaluate.txt", "r") as f:
                    prompt = f.read()
                    
                with open ("prompts/evaluate_reflex.txt", "r") as f:
                    prompt_reflex = f.read()
                    
                with open ("prompts/evaluate_with_metrics.txt", "r") as f:
                    prompt_with_metrics = f.read()

                print_colored(f"Please find the file in {base_path}/{scenario}/files/{file_id}/", 'cyan')
                print_colored(f"Please find the input in {base_path}/{scenario}/input/{file_id}/input_with_hint_{hint_level}.txt", 'cyan')
                
                answer = get_user_input_multiline("Enter the answer from ChatGPT: ")
                
                human_judge = ""
                
                while human_judge.lower() not in ['f', 'p', 's']:
                    human_judge = input("""According to the standard here: 
        - failed: The agent failed to spot the issue, or just point out a general issue relevant to the hint without mentioning any correct context relevant to involved files' context in <issue>.
        - partially: The agent spotted the issue in <issue> and provide correct context evidence which aligns with the context in <issue>, but it failed to caputure all issues and point out all context evidence.
        - success: The agent not only spotted all the issue but also provided relevant evidence relevant to "involved" field in <issue>. Even if it raises other irrelevant issues. Once it mentions the context, it should be rated as success.

        Rate the agent's answer (f, p, s): """)
                    if human_judge.lower() == 'f':
                        human_judge = "failed"
                        break
                    elif human_judge.lower() == 'p':
                        human_judge = "partially"
                        break
                    elif human_judge.lower() == 's':
                        human_judge = "success"
                        break
                    else:
                        print("Invalid input. Please enter a valid rating.")
                        

                prompt_with_metrics = prompt_with_metrics.replace("<ANSWER>", answer).replace("<ISSUE>", issue).replace("<HINT>", hint)
                os.makedirs("{}/hint_level_{}".format(save_path, hint_level), exist_ok=True)

     
                    
                with open ("{}/hint_level_{}/evaluator_input_with_metrics.txt".format(save_path, hint_level), "w") as f:
                    f.write(prompt_with_metrics)
                
                with open ("{}/hint_level_{}/answer.txt".format(save_path, hint_level), "w") as f:
                    f.write(answer)

                # if os.path.exists("evaluator_test/data.json"):
                #     with open ("evaluator_test/data.json", "r") as f:
                #         data = json.load(f)
                # else:
                #     data = {"datas": []}

                # data["datas"].append({"id": file_id, "hint_level": hint_level, "scenario": scenario, "input": prompt, "answer": answer, "gt": human_judge})   

                # with open ("evaluator_test/data.json", "w") as f:
                #     json.dump(data, f, indent=4)

                # print_colored("You can find evaluator input at evaluator_test/{}/{}/hint_level_{}/evaluator_input.txt".format(scenario, file_id, hint_level), 'green')
                
                # gpt4_response = get_user_input_multiline("Enter the GPT-4 evaluator response: ")
                # gpt3_5_response = get_user_input_multiline("Enter the GPT-3.5 evaluator response: ")
                
                # with open ("evaluator_test/{}/{}/hint_level_{}/evaluator_output_gpt4.txt".format(scenario, file_id, hint_level), "w") as f:
                #     f.write(gpt4_response)
                # with open ("evaluator_test/{}/{}/hint_level_{}/evaluator_output_gpt3_5.txt".format(scenario, file_id, hint_level), "w") as f:
                #     f.write(gpt3_5_response)
                    
                print_colored("Completed hint level: " + str(hint_level), 'green')
                print("/n/n")
    else:
        print_colored(f"Available file IDs for scenario '{scenario}': {', '.join(available_file_ids)}", 'green')

        file_id = get_user_input("Enter the file ID: ")
        
        while file_id not in available_file_ids:
            print_colored(f"File ID '{file_id}' not found. Available file IDs are: {', '.join(available_file_ids)}", 'red')
            file_id = get_user_input("Enter the file ID: ")
            
        issue = manager.get_issue_context(file_id=file_id)
        
        os.makedirs("evaluator_test/{}/{}".format(scenario, file_id), exist_ok=True)
        with open ("evaluator_test/{}/{}/issue.txt".format(scenario, file_id), "w") as f:
            f.write(issue)
            
        hints = manager.get_hints(file_id=file_id)

        for hint_level in range(-1, 3):  # Loop through hint levels 0, 1, and 2
            if hint_level == -1:
                hint = "None"
            else:
                hint = hints[hint_level]
                
            print_colored(f"Scenario: {scenario} | File ID: {file_id} | hint Level: {hint_level}", 'yellow')
            issue = issue.replace(r'\n', '\n').replace('\\',"")
            print("\n")
            print(issue)
            print("\n")
            
            print("\n")
            print("hint is: ",hint)
            print("\n")
            
            with open ("prompts/evaluate.txt", "r") as f:
                prompt = f.read()
                
            with open ("prompts/evaluate_reflex.txt", "r") as f:
                prompt_reflex = f.read()
                
            with open ("prompts/evaluate_with_metrics.txt", "r") as f:
                prompt_with_metrics = f.read()

            print_colored(f"Please find the file in {base_path}/{scenario}/files/{file_id}/", 'cyan')
            print_colored(f"Please find the input to ChatGPT in {base_path}/{scenario}/input/{file_id}/input_with_hint_{hint_level}.txt", 'cyan')
            
            answer = get_user_input_multiline("Enter the answer from ChatGPT: ")
            
            human_judge = ""
            
            while human_judge.lower() not in ['f', 'p', 's']:
                human_judge = input("""According to the standard here: 
    - failed: The agent failed to spot the issue, or just point out a general issue relevant to the hint without mentioning any correct context relevant to involved files' context in <issue>.
    - partially: The agent spotted the issue in <issue> and provide correct context evidence which aligns with the context in <issue>, but it failed to caputure all issues and point out all context evidence.
    - success: The agent not only spotted all the issue but also provided relevant evidence relevant to "involved" field in <issue>. Even if it raises other irrelevant issues. Once it mentions the context, it should be rated as success.

    Rate the agent's answer (f, p, s): """)
                if human_judge.lower() == 'f':
                    human_judge = "failed"
                    break
                elif human_judge.lower() == 'p':
                    human_judge = "partially"
                    break
                elif human_judge.lower() == 's':
                    human_judge = "success"
                    break
                else:
                    print("Invalid input. Please enter a valid rating.")
                    
            prompt = prompt.replace("<ANSWER>", answer).replace("<ISSUE>", issue).replace("<HINT>", hint)
            prompt_reflex = prompt_reflex.replace("<ANSWER>", answer).replace("<ISSUE>", issue).replace("<HINT>", hint)
            prompt_with_metrics = prompt_with_metrics.replace("<ANSWER>", answer).replace("<ISSUE>", issue).replace("<HINT>", hint)
            os.makedirs("evaluator_test/{}/{}/hint_level_{}".format(scenario, file_id, hint_level), exist_ok=True)

            with open ("evaluator_test/{}/{}/hint_level_{}/evaluator_input.txt".format(scenario, file_id, hint_level), "w") as f:
                f.write(prompt)
                
            with open ("evaluator_test/{}/{}/hint_level_{}/evaluator_input_reflex.txt".format(scenario, file_id, hint_level), "w") as f:
                f.write(prompt_reflex)
                
            with open ("evaluator_test/{}/{}/hint_level_{}/evaluator_input_with_metrics.txt".format(scenario, file_id, hint_level), "w") as f:
                f.write(prompt_with_metrics)
            
            with open ("evaluator_test/{}/{}/hint_level_{}/answer.txt".format(scenario, file_id, hint_level), "w") as f:
                f.write(answer)

            if os.path.exists("evaluator_test/data.json"):
                with open ("evaluator_test/data.json", "r") as f:
                    data = json.load(f)
            else:
                data = {"datas": []}

            data["datas"].append({"id": file_id, "hint_level": hint_level, "scenario": scenario, "input": prompt, "answer": answer, "gt": human_judge})   

            with open ("evaluator_test/data.json", "w") as f:
                json.dump(data, f, indent=4)

            print_colored("You can find evaluator input at evaluator_test/{}/{}/hint_level_{}/evaluator_input_with_metrics.txt".format(scenario, file_id, hint_level), 'green')
            
            gpt4_response = get_user_input_multiline("Enter the GPT-4 evaluator response: ")
            # gpt3_5_response = get_user_input_multiline("Enter the GPT-3.5 evaluator response: ")
            
            with open ("evaluator_test/{}/{}/hint_level_{}/evaluator_output_gpt4.txt".format(scenario, file_id, hint_level), "w") as f:
                f.write(gpt4_response)
            # with open ("evaluator_test/{}/{}/hint_level_{}/evaluator_output_gpt3_5.txt".format(scenario, file_id, hint_level), "w") as f:
            #     f.write(gpt3_5_response)
                
            print_colored("Completed hint level: " + str(hint_level), 'green')
            print("/n/n")

if __name__ == "__main__":
    main()
