from __future__ import annotations

import json
from collections.abc import Generator
from copy import deepcopy
from dataclasses import dataclass
from enum import Enum
from app.post_process import ExtractStatus, is_valid_json
from loguru import logger
from typing import Any
from app.log import print_acr
from app.agents.agent_common import InvalidLLMResponse
from app.data_structures import MessageThread, ReproResult
from app.model import common

# `pdb`-based analysis

QUESTION_PROMPT = """To help diagnose and refine the generated code with latest feedback, please carefully answer the following questions. Be objective and analytical.

1. **Failure Identification**  
   In the generated code, which line(s) are most likely responsible for the test failure? What functionality were those lines trying to implement?
   
2. **Context Relevance**  
   Review the code context retrieved so far. Is there any code snippet, usage pattern, or related implementation that performs similar functionality or provides insights into how the target function should be implemented?

   - If yes:
     • Describe how it works and what problem it solves.  
     • Explain how it relates to the current task — e.g., does it show the expected input/output structure, processing logic, or calling pattern?  
     • Highlight whether it handles similar edge cases, dependencies, or class structures.

   - If not:
     • Clearly state what specific kind of context is still missing (e.g., real usage examples, how the return object is constructed, etc.).
     • Suggest which search API(s) (e.g., `search_similar_function`, `search_target_usage_example`) could help retrieve this missing information.

   **Examples of useful context matches**:
   - A function with the same name (e.g., `get_itrs`) implemented in a different class or module, showing a variant behavior.
   - Code that **calls** the target function, which may reveal expected argument types, default behaviors, or object lifecycles.
   - A similar function (e.g., `get_gcrs`) that uses the same inputs (e.g., `obstime`, `location`) and returns a related coordinate object.
   - A test or utility function that builds or transforms the same object type (e.g., `ITRS`, `EarthLocation`) in a different frame or context.

3. **Design Comparison**  
   Does the buggy implementation differ in key design choices compared to the contextual examples (e.g., imports, call patterns, parameter structure, object design)? Are any critical assumptions or contracts violated?

4. **Next Action**  
   Do you have sufficient information to fix the bug?  
   - If you believe no extra context is needed, explain your fix strategy in detail into the "fix_strategy" section below.  
   - If you believe calling more APIs for extra context will help you better debug, clearly describe the missing information and propose which previously introduced search APIs (e.g., `search_relevant_method`, `search_class_in_file`) could help gather it. Put this part into the "missing_info" section below.

Respond using the following JSON format:
{
  "Q1": "...",
  "Q2": "...",
  "Q3": "...",
  "Q4": {
    "fix_strategy": "...", // or null
    "missing_info": "...", // or null
  }
}

Instructions:
- Q1, Q2, and Q3 must be filled — be specific and grounded in available evidence.
- In Q4, only **one** of `fix_strategy` or `missing_info` should be non-null.
- If the test failure's root cause is uncertain, or you keep reveiving the same error feeback after code refinement, it is better to request more information than to guess and repeat the error.
- Remember: you have access to multiple code search APIs. Use them when needed.
"""

# - Now you are forced to clearify the missing info and continue context search delete this afterwards. Only for testing.

def run(
    issue_statement: str,
    patch_content: str,
    test_feedback_save_path: str,
    entire_thread: MessageThread, # the traj from the beginning till here
    retries: int = 3,
) -> tuple[dict, MessageThread, str]:
    
    with open(test_feedback_save_path, "r") as f:
        test_feedback_list = json.load(f)
    
    review_generator = run_with_retries_debugger(
        issue_statement,
        patch_content,
        test_feedback_list,
        entire_thread,
        retries=retries,
    )
    for answe_json, complete_thread, test_feedback_str in review_generator:
        # TODO: make output dir global, so that the raw responses can be dumped
        if answe_json is not None:
            return answe_json, complete_thread, test_feedback_str
    raise InvalidLLMResponse(f"failed to review in {retries} attempts")



def run_with_retries_debugger(
    issue_statement: str,
    patch: str,
    test_feedback_list: list,
    entire_thread: MessageThread, # task traj from beginning till here
    retries: int = 5,
) -> Generator[tuple[dict, MessageThread, str], None, None]:
    # add test info
    test_str = ""
    for idx, test_feedback in enumerate(test_feedback_list):
        ele_prompt = (
            f'- Test {idx+1} Feedback:\n{test_feedback["parsed_feedback"]}\n'
        )
        test_str += ele_prompt
    test_prompt = f"Here are the test cases for the target function, along with the feedback after executing them against the generated implementation:\n{test_str}.\n"
    entire_thread.add_user(test_prompt)
    entire_thread.add_user(QUESTION_PROMPT) # so far so good.
    print_acr(msg = test_str, desc = "Test Feedback")
    print_acr(QUESTION_PROMPT, "Debugger Analyze")
    for _ in range(1, retries + 1):
        response, answe_json, entire_thread = get_response_in_json(entire_thread)
        logger.info(response)
        if answe_json is None:
            yield None, entire_thread, test_str
            continue

        yield answe_json, entire_thread, test_str
        break



def get_response_in_json(task_traj: MessageThread) -> tuple[Any, dict, MessageThread]:
    """let the model to answer these fixed questions, and guarantee the 
    response is an valid json
    Args:
        task_traj (MessageThread): entire task thread asking 4 questions
    Returns:
        model's json response in str and entire message thread
    """
    task_traj_copy = task_traj.copy()
    for _ in range(5):
        res_text, *_ = common.SELECTED_MODEL.call(task_traj_copy.to_msg(),
                                                  response_format="json_object")
        # check whether res_text is a valid json
        extract_status, data = is_valid_json(res_text)
        if extract_status != ExtractStatus.IS_VALID_JSON:
            error_info = "Your returned response can't be converted into JSON. Please retry."
            print(error_info)
            logger.debug(error_info)
            task_traj_copy.add_model(res_text)
            task_traj_copy.add_user(error_info)
            continue
        
        is_valid, diagnosis = is_valid_debugger_response(data)
        if not is_valid: # the json format is not valid
            error_info = f"Your json response doesn't satisfy the following requirement: {diagnosis}. Please retry."
            print(error_info)
            logger.debug(error_info)
            task_traj_copy.add_model(res_text)
            task_traj_copy.add_user(error_info)
            continue
        
        logger.debug("In debugger: extracted a valid json.")
        task_traj.add_model(res_text)
        return res_text, json.loads(res_text), task_traj



def is_valid_debugger_response(data: Any) -> tuple[bool, str]:
    if not isinstance(data, dict):
        return False, "Response is not a dict"

    required_keys = {"Q1", "Q2", "Q3", "Q4"}
    if not required_keys.issubset(data.keys()):
        return False, f"Missing required keys. Required: {required_keys}, but got: {set(data.keys())}"

    if not isinstance(data["Q1"], str):
        return False, "Answers for Q1 must be a string"
    if not isinstance(data["Q2"], str):
        return False, "Answers for Q2 must be a string"
    if not isinstance(data["Q3"], str):
        return False, "Answers for Q3 must be a string"

    q4 = data["Q4"]
    if not isinstance(q4, dict):
        return False, "Q4 must be a dictionary"

    if "fix_strategy" not in q4 or "missing_info" not in q4:
        return False, "Q4 must contain both 'fix_strategy' and 'missing_info'"

    fix = q4["fix_strategy"]
    miss = q4["missing_info"]

    if not (isinstance(fix, str) or fix is None):
        return False, "Q4['fix_strategy'] must be a string or null"
    if not (isinstance(miss, str) or miss is None):
        return False, "Q4['missing_info'] must be a string or null"

    if fix and miss:
        return False, "Only one of 'fix_strategy' or 'missing_info' should be non-empty"
    if not fix and not miss:
        return False, "Either 'fix_strategy' or 'missing_info' must be provided"

    return True, "OK"



if __name__ == "__main__":
    pass

#     # setup before test

#     register_all_models()
#     common.set_model("gpt-4-0125-preview")

#     # TEST
#     instance_id = "matplotlib__matplotlib-23299"

#     problem_stmt = "[Bug]: get_backend() clears figures from Gcf.figs if they were created under rc_context\n### Bug summary\r\n\r\ncalling `matplotlib.get_backend()` removes all figures from `Gcf` if the *first* figure in `Gcf.figs` was created in an `rc_context`.\r\n\r\n### Code for reproduction\r\n\r\n```python\r\nimport matplotlib.pyplot as plt\r\nfrom matplotlib import get_backend, rc_context\r\n\r\n# fig1 = plt.figure()  # <- UNCOMMENT THIS LINE AND IT WILL WORK\r\n# plt.ion()            # <- ALTERNATIVELY, UNCOMMENT THIS LINE AND IT WILL ALSO WORK\r\nwith rc_context():\r\n    fig2 = plt.figure()\r\nbefore = f'{id(plt._pylab_helpers.Gcf)} {plt._pylab_helpers.Gcf.figs!r}'\r\nget_backend()\r\nafter = f'{id(plt._pylab_helpers.Gcf)} {plt._pylab_helpers.Gcf.figs!r}'\r\n\r\nassert before == after, '\\n' + before + '\\n' + after\r\n```\r\n\r\n\r\n### Actual outcome\r\n\r\n```\r\n---------------------------------------------------------------------------\r\nAssertionError                            Traceback (most recent call last)\r\n<ipython-input-1-fa4d099aa289> in <cell line: 11>()\r\n      9 after = f'{id(plt._pylab_helpers.Gcf)} {plt._pylab_helpers.Gcf.figs!r}'\r\n     10 \r\n---> 11 assert before == after, '\\n' + before + '\\n' + after\r\n     12 \r\n\r\nAssertionError: \r\n94453354309744 OrderedDict([(1, <matplotlib.backends.backend_qt.FigureManagerQT object at 0x7fb33e26c220>)])\r\n94453354309744 OrderedDict()\r\n```\r\n\r\n### Expected outcome\r\n\r\nThe figure should not be missing from `Gcf`.  Consequences of this are, e.g, `plt.close(fig2)` doesn't work because `Gcf.destroy_fig()` can't find it.\r\n\r\n### Additional information\r\n\r\n_No response_\r\n\r\n### Operating system\r\n\r\nXubuntu\r\n\r\n### Matplotlib Version\r\n\r\n3.5.2\r\n\r\n### Matplotlib Backend\r\n\r\nQtAgg\r\n\r\n### Python version\r\n\r\nPython 3.10.4\r\n\r\n### Jupyter version\r\n\r\nn/a\r\n\r\n### Installation\r\n\r\nconda\n"

#     test = """# reproducer.py
# import matplotlib.pyplot as plt
# from matplotlib import get_backend, rc_context

# def main():
#     # Uncommenting either of the lines below would work around the issue
#     # fig1 = plt.figure()
#     # plt.ion()
#     with rc_context():
#         fig2 = plt.figure()
#     before = f'{id(plt._pylab_helpers.Gcf)} {plt._pylab_helpers.Gcf.figs!r}'
#     get_backend()
#     after = f'{id(plt._pylab_helpers.Gcf)} {plt._pylab_helpers.Gcf.figs!r}'

#     assert before == after, '\n' + before + '\n' + after

# if __name__ == "__main__":
#     main()
# """

#     patch = """diff --git a/lib/matplotlib/__init__.py b/lib/matplotlib/__init__.py
# index c268a56724..b40f1246b9 100644
# --- a/lib/matplotlib/__init__.py
# +++ b/lib/matplotlib/__init__.py
# @@ -1087,7 +1087,9 @@ def rc_context(rc=None, fname=None):
#               plt.plot(x, y)  # uses 'print.rc'

#      \"\"\"
# +    from matplotlib._pylab_helpers import Gcf
#      orig = rcParams.copy()
# +    orig_figs = Gcf.figs.copy()  # Preserve the original figures
#      try:
#          if fname:
#              rc_file(fname)
# @@ -1096,6 +1098,7 @@ def rc_context(rc=None, fname=None):
#          yield
#      finally:
#          dict.update(rcParams, orig)  # Revert to the original rcs.
# +        Gcf.figs.update(orig_figs)  # Restore the original figures


#  def use(backend, *, force=True):"""

#     # run_with_retries(problem_stmt, test, patch)

#     success = False

#     for attempt_idx, (raw_response, thread, review_result) in enumerate(
#         run_with_retries(problem_stmt, test, patch), start=1
#     ):

#         success |= review_result is not None

#         # dump raw results for debugging
#         Path(f"agent_reviewer_raw_{attempt_idx}.json").write_text(
#             json.dumps(thread.to_msg(), indent=4)
#         )

#         if success:
#             print(f"Success at attempt {attempt_idx}. Review result is {review_result}")
#             break

#     if not success:
#         print("Still failing to produce valid review results after 5 attempts")
