import argparse

from src.eval.toolfuzz.utils.setup import setup_env_vars
from src.eval.toolfuzz.env_prompts import file_system_context, ghub_context, python_source_code_context
from src.eval.toolfuzz.envs.reset_context import DelayResetContext
from src.eval.toolfuzz.utils.tools import get_langchain_tools, get_composio_tools
from src.toolfuzz.correctness.correctness_fuzzer import CorrectnessTester
from src.toolfuzz.tools.info_extractors.tool_wrapper_factory import ToolWrapperFactory
from src.toolfuzz.agent_executors.langchain.react_new import ReactAgentNew
from src.toolfuzz.result_classes import Budget

"""
One test is one prompt template -> This are a bunch of questions
1. Evaluate all the questions
2. Put them in buckets for output (output bucket)
3. Put them in buckets for input arguments (argument bucket)

For now save each record
"""


def args():
    parser = argparse.ArgumentParser(description="Test agent tools.")
    parser.add_argument('-am', dest='agent_model')
    parser.add_argument('-pm', dest='prompt_model')
    parser.add_argument('-t', dest='tool')
    parser.add_argument('-l', dest='langchain', action='store_true')
    return parser.parse_args()


cl_args = args()


def test_tool(tools):
    tool_to_test = None
    for tool in tools:
        tool_wrapper = ToolWrapperFactory.create_extractor(tool)
        if tool_wrapper.get_tool_name() == cl_args.tool:
            tool_to_test = tool
            break
    assert tool_to_test is not None, f"Tool {cl_args.tool} not found in langchain tools"

    context = ''

    if tool_wrapper.get_tool_name() in ['copy_file', 'file_delete', 'file_search', 'move_file', 'read_file', 'write_file',
                     'list_directory', 'terminal'] or 'FILETOOL' in tool_wrapper.get_tool_name():
        context = file_system_context
    elif tool_wrapper.get_tool_name() in ['Get_Issues', 'Get_Issue', 'Comment_on_Issue', 'List_open_pull_requests__PRs_',
                       'Get_Pull_Request', 'Overview_of_files_included_in_PR', 'Create_Pull_Request',
                       'List_Pull_Requests_Files', 'Create_File', 'Read_File', 'Update_File',
                       'Delete_File',
                       'Overview_of_existing_files_in_Main_branch',
                       'Overview_of_files_in_current_working_branch',
                       'List_branches_in_this_repository', 'Set_active_branch', 'Create_a_new_branch',
                       'Get_files_from_a_directory', 'Search_issues_and_pull_requests', 'Search_code',
                       'Create_review_request']:
        context = ghub_context
    elif "CODE" in tool_wrapper.get_tool_name():
        context = python_source_code_context
    else:
        context = ''
    
    agent = ReactAgentNew(tool_to_test, cl_args.agent_model)
    # 5 minutes and similiar ammount of tokens around the 5 minutes
    budget = Budget(time_limit=300, agent_token_limit=25_000, prompt_token_limit=25_000, agent_cost_limit=0.1, prompt_cost_limit=0.15)

    tester = CorrectnessTester(llm=cl_args.prompt_model, tool=tool_to_test,
                               additional_context=context, agent=agent, budget=budget,
                               context_resetter=DelayResetContext(tool_wrapper))
    tester.test()
    tester.save(f'result_{cl_args.tool}.json')


def main():
    if cl_args.langchain:
        test_tool(get_langchain_tools())
    else:
        test_tool(get_composio_tools())


if __name__ == '__main__':
    setup_env_vars()
    main()
