import gzip
import json
import logging
import os
import pickle
import random
import re
from datetime import datetime
from typing import Any, Dict, List, Optional
from collections import defaultdict

import bgym
import pandas as pd
from agentlab.llm.response_api import MessageBuilder

from jephhinter.configs import HintPromptConfig, HintsMinerConfig, JephHinterConfig
from jephhinter.utils.utils import extract_structured_response

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class SimpleDiscussion:
    """Minimal message grouping for guideline prompting."""

    def __init__(self):
        self.messages: List[MessageBuilder] = []

    def append(self, message: MessageBuilder):
        self.messages.append(message)

    def flatten(self) -> List[MessageBuilder]:
        return self.messages


def load_all_step_pickles(root_dir):
    step_data = []

    for dirpath, _, filenames in os.walk(root_dir):
        for fname in filenames:
            if fname.endswith(".pkl.gz") and fname.startswith("step_"):
                fpath = os.path.join(dirpath, fname)
                try:
                    with gzip.open(fpath, "rb") as f:
                        compressed_data = f.read()
                    data = pickle.loads(compressed_data)  # type: ignore
                    step_data.append({"file": fpath, "data": data})
                except Exception as e:
                    logger.info(f"Failed to load {fpath}: {e}")
    return step_data


def extract_trace_info(step_data):
    """Extract meaningful information from step data for guideline generation."""
    trace_info = []
    for step in step_data:
        step_info = step["data"]
        if hasattr(step_info, "obs") and hasattr(step_info, "agent_info"):
            # Extract observation and agent info
            obs = step_info.obs
            agent_info = step_info.agent_info
            axtree_txt = obs.get("axtree_txt", "")

            # Get the agent's thinking process
            think = getattr(agent_info, "think", "")

            # Get the action taken
            action = getattr(step_info, "action", "")

            # Get any error messages
            last_action_error = obs.get("last_action_error", "") if isinstance(obs, dict) else ""

            # Get goal information
            goal = obs.get("goal", []) if isinstance(obs, dict) else []

            # Get the current reward
            reward = getattr(step_info, "reward", 0)

            trace_info.append(
                {
                    "axtree_txt": axtree_txt,
                    "think": think,
                    "action": action,
                    "error": last_action_error,
                    "goal": goal,
                    "reward": reward,
                    "step_file": step["file"],
                }
            )

    return trace_info


def get_status(observation_text, url, logger, method='self', llm=None, msg_builder=None, seen_summarizations=None):
    """Generate state summarization using LLM for intelligent categorization."""
    
    # If LLM is not provided, fall back to a simple default
    if llm is None or msg_builder is None:
        return "On a web page"
    
    # Prepare the list of seen summarizations
    if seen_summarizations is None:
        seen_summarizations = []
    
    # Create the prompt
    system_prompt = """You are an autonomous intelligent agent tasked with navigating a web browser. You will be provided with the following information:
1. A list of context summarizations you have seen in the past.
2. A snippet of the current web page's accessibility tree: a simplified representation of the webpage with key information and the current web pages' URL.

Please generate the summarization of the current observation after 'SUMMARIZATION:'.

Here are some requirements:
Requirement 1: Different types of webpages should have clearly different summarizations. For example, for GitHub there can be the main page of GitHub, the overview page of a GitHub user, the issues page of a GitHub repository, the search result page of GitHub. etc, you should clearly categorize those and make sure not to mix them up.

Requirement 2: Important: The summarization should be general, and concise, without any user/object/task specific information, instead, websites that fall into the same categories should have the same summarization, for example, the main page of every reddit forum should be categorized as the same context summarization: On the main page of a Reddit forum. You should never include the specific name of the forum in the summarization.

Requirement 3: The URLs will be very useful for you to determine the summarization.

Requirement 4: If the context is the same as one from the seen list, directly copy the best matching one word by word."""

    # Format seen summarizations
    seen_summarizations_text = ""
    if seen_summarizations:
        seen_summarizations_text = "Here are the context summarizations you have seen in the past:\n"
        for i, summary in enumerate(seen_summarizations, 1):
            seen_summarizations_text += f"{i}. {summary}\n"
    else:
        seen_summarizations_text = "No previous context summarizations available."

    user_prompt = f"""1. List of context summarizations you have seen in the past:
{seen_summarizations_text}

2. Current web page's accessibility tree:
{observation_text}

3. Current web page's URL:
{url}

SUMMARIZATION:"""

    # Create the discussion and get response
    discussion = SimpleDiscussion()
    sys_msg = msg_builder.system().add_text(system_prompt)
    discussion.append(sys_msg)
    user_msg = msg_builder.user().add_text(user_prompt)
    discussion.append(user_msg)
    
    from agentlab.llm.response_api import APIPayload
    payload = APIPayload(messages=discussion.flatten())
    response = llm(payload)
    
    # Extract the summarization from the response
    answer = response.think if hasattr(response, "think") else str(response)
    
    # Extract text after "SUMMARIZATION:"
    if "SUMMARIZATION:" in answer:
        summarization = answer.split("SUMMARIZATION:")[-1].strip()
    else:
        summarization = answer.strip()
    
    # Clean up the summarization
    summarization = summarization.replace("\n", " ").strip()
    if summarization not in seen_summarizations:
        seen_summarizations.append(summarization)
    logger.debug(f"Generated summarization: {summarization}")
    
    return summarization, seen_summarizations


def action_same_state(demo_trajectory, start_step, end_step, curr_sum, logger, llm=None, msg_builder=None, seen_summarizations=None):
    """Extract actions that occur on the same page/state."""
    later_actions = ''
    
    if end_step is None:
        # Get future actions on the same page
        for step in range(start_step + 1, len(demo_trajectory) // 2):
            next_obs = demo_trajectory[2 * step]['observation']['text']
            next_url = demo_trajectory[2 * step]['info']['page'].url
            next_sum, _ = get_status(next_obs, next_url, logger, method='self', llm=llm, msg_builder=msg_builder, seen_summarizations=seen_summarizations)
            
            if next_sum == curr_sum:
                obs = demo_trajectory[2 * step]['observation']['text']
                act = demo_trajectory[2 * step + 1]  # action follows observation
                later_actions += f'Observation for Future step {step-start_step}:\n{obs}\nURL:{next_url}\nDemonstration action for Future step {step-start_step}: {act}\n'
            else:
                break
    else:
        # Get previous actions on the same page
        all_prevs = 0
        for step in list(range(min(end_step, len(demo_trajectory) // 2)))[::-1]:
            next_obs = demo_trajectory[2 * step]['observation']['text']
            next_url = demo_trajectory[2 * step]['info']['page'].url
            next_sum, _ = get_status(next_obs, next_url, logger, method='self', llm=llm, msg_builder=msg_builder, seen_summarizations=seen_summarizations)
            if next_sum == curr_sum:
                all_prevs += 1

        for step in list(range(min(end_step, len(demo_trajectory) // 2)))[::-1]:
            next_obs = demo_trajectory[2 * step]['observation']['text']
            next_url = demo_trajectory[2 * step]['info']['page'].url
            next_sum, _ = get_status(next_obs, next_url, logger, method='self', llm=llm, msg_builder=msg_builder, seen_summarizations=seen_summarizations)
            if next_sum == curr_sum:
                obs = demo_trajectory[2 * step]['observation']['text']
                act = demo_trajectory[2 * step + 1]
                later_actions = f'Observation for Previous step {all_prevs-(end_step-step)+1}:\n{obs}\nURL:{next_url}\nDemonstration action for Previous step {all_prevs-(end_step-step)+1}: {act}\n' + later_actions
            else:
                break
    
    if later_actions == '':
        later_actions = 'None'
    return later_actions


def identical_action(act1, act2):
    """Check if two actions are identical (ignoring content for type actions)."""
    if not (isinstance(act1, str) or isinstance(act2, str)):
        for key in ['action_type', 'element_role', 'element_name', 'page_number', 'element_id']:
            if act1[key] != act2[key]:
                return False
        return True 
    elif isinstance(act1, str) and isinstance(act2, str):
        # Normalize quotes to handle both single and double quotes
        def normalize_quotes(action_str):
            # Replace single quotes with double quotes for comparison
            return action_str.replace("'", '"')
        
        return normalize_quotes(act1) == normalize_quotes(act2)
    return False

# Add the helper function after line 202
def _extract_think_and_guideline(response_text):
    """
    Extract think and guideline from LLM response that contains <think> and <guideline> tags.
    Returns both the thinking part and the guideline part.
    """
    import re
    
    # Look for <think> tags
    think_pattern = r'<think>(.*?)</think>'
    think_match = re.search(think_pattern, response_text, re.DOTALL)
    
    # Look for <guideline> tags - handle both complete and incomplete tags
    guideline_pattern = r'<guideline>(.*?)</guideline>'
    guideline_match = re.search(guideline_pattern, response_text, re.DOTALL)
    
    think = think_match.group(1).strip() if think_match else ""
    
    if guideline_match:
        # Complete <guideline>...</guideline> found
        guideline = guideline_match.group(1).strip()
    else:
        # Check if there's an opening <guideline> tag without closing tag
        guideline_start_pattern = r'<guideline>(.*)'
        guideline_start_match = re.search(guideline_start_pattern, response_text, re.DOTALL)
        if guideline_start_match:
            # Take everything after <guideline> as the guideline
            guideline = guideline_start_match.group(1).strip()
        else:
            guideline = ""
    
    # Add topic extraction:
    topic_pattern = r'<topic>(.*?)</topic>'
    topic_match = re.search(topic_pattern, response_text, re.DOTALL)
    topic = topic_match.group(1).strip() if topic_match else ""
    
    return think, guideline, topic

def gen_guideline(observation, summarization, predicted_action, demo_action, guidelines, later_actions, prev_actions, llm, msg_builder, guideline_topics=None):
    """Generate a guideline by comparing failed prediction with correct action."""
    observation = observation.split('STATE SUMMARIZATION: ')[0]
    
    # Create the system message with instructions
    system_prompt = """You are an autonomous intelligent agent tasked with navigating a web browser. At each time step, you need to generate one action given the current observation. 

# Action space:
Note: This action set allows you to interact with your environment. Most of them
are python function executing playwright code. The primary way of referring to
elements in the page is through bid which are specified in your observations.


12 different types of actions are available.

noop(wait_ms: float = 1000)
scroll(delta_x: float, delta_y: float)
fill(bid: str, value: str)
select_option(bid: str, options: str | list[str])
click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
hover(bid: str)
press(bid: str, key_comb: str)
focus(bid: str)
clear(bid: str)
drag_and_drop(from_bid: str, to_bid: str)
send_msg_to_user(text: str)
Only a single action can be provided at once. Example:
fill('a12', 'example with "quotes"')


You just finished a task but failed. For this failed task, we provide a human demonstration for you. Please compare the demonstration action with your generated action at each timestep, reason about the intention of the correct action, and then generate an action guideline for future tasks to avoid the same mistake and make the future tasks successful. 

Based on the information provided, please generate a short and concise guideline that guides you to issue the correct action. Here are some requirements: 
REQUIREMENT 1: (IMPORTANT!!!) The guideline should be general enough to generalize to all similar tasks, not only this task. Therefore do not include any task-specific information in your guideline, for example a user name, a specific forum or biography, the specific text you want to enter, or any number ID in [] in front of each element, for example [123], the numbers are randomly generated therefore never include them in your guideline.
However, for other non-specific elements like "link 'Forums'" or "button 'Create submission'", you should specifically include them in the EXACT TEXT in your guideline, including both the type(such as button/link/textbox) and the name(for example 'Create submission').
You should also describe the position of the element of interest, for example: click on the "link 'Forums'", which locates at the early part of the observation, right above link 'Wiki', with action ```click [link_id]```.

REQUIREMENT 2: When referring to a url in your guideline, specify it as detailed as possible, only replace the task specific information as a placeholder, for example, replace a forum name with <forum_name> and specify the url in full, starts with http://. 

REQUIREMENT 3: 
The guideline should be less than 256 tokens, and in this format: "when in [what status], if you want to (but ...), (after you have ...), you can ... The correct action format to do this is ```...```".
For the "what status" part, use the exact words in "A summarization of the current context". 
For the "if you want to ..." and "you can ... " part, please refer to "the previous actions" and "Demonstration actions in later steps" to generate more accurate descriptions of your purpose and the sequence of actions to achieve the purpose. In the end you should write out the correct action format in ``````, for example ```type [id] [content] [1]```.

REQUIREMENT 4: 
VERY IMPORTANT: If the "Previous Actions on the same page" or  "Demonstration actions in later timesteps on the same page" is not empty, MAKE SURE to emphasize the order of the actions, do not miss any single action, and put them in step a. step b. step c. ..., for example 'if you want to ...　(but ...), you should do these sequentially: step a. ... the correct action format to do this is ```...```, step b. ... the correct action format to do this is ```...``` step c. ... the correct action format to do this is ```...```" YOU MUST list "5. Previous Actions on the same page", "4. The correct action that you should take" and "6. Demonstration actions in later timesteps on the same page" SEQUENTIALLY, DO NOT MISS A SINGLE OF THEM, it is especially IMPORTANT that you DO NOT miss "Demonstration actions in later timesteps on the same page", for example if it shows Demonstration action for Previous step 1 and 2, you should put them in a. and b. and then put the action for the current timestep as step c. For each of the action is step a. step b. step c. ..., you should follow the requirement above by clearly writing out the element of interest, its position, the action you should take, correct action format, all of them. Every single action should be put inside ```...```, but NEVER INCLUDE THE SPECIFIC NUMBER ID, for example [123]! The "(but ...)" part can be important, for example when on a list page, you only scroll [down] once if you CANNOT find the desired item, otherwise you don't need to do it, pay attention the reasoning of "The correct action that you should take" to fill in the (but...) part! 

REQUIREMENT 5: Only specify what to do or what not to do, don't explain why. Please strictly adhere to the 'correct action that you should take', do not propose other actions.

REQUIREMENT 6: clearly specify when to issue a stop action when the stop action is either the correct action or in the 'Demonstration actions in later steps on the same page.', do not specify the 'answer' in 'stop [answer]' because answer is different for different tasks, and do not mention anything about stop if this action is neither in "The correct action that you should take." nor "Demonstration actions in later steps on the same page.". 

REQUIREMENT 7: (CRITICAL) The guideline must be written as a SINGLE LINE without any line breaks or paragraph separations.

REQUIREMENT 8: Always provide 1 short sentence describing the general topic of the task in which this guideline applies. Something like "filtering the table", "filling the combobox in the form", "filling the multitab form", "navigating the application", etc. Use one of the previously used topics if possible, or create a new one if nothing fits.
"""

    # Create the user message with specific data
    user_prompt = f"""Here are the information you need:
1. The current observation:
{observation}

2. A Summarization of the current context:
{summarization}

3. The action you generated in the failed run and your reasoning about why you generated this action:
{predicted_action}

4. The correct action that you should take: 
{demo_action}

5. Previous Actions on the same page, together with the corresponding observations:
{prev_actions}

6. Demonstration actions in later steps on the same page, together with the observations:
{later_actions}

CRITICAL: You MUST respond using the structured format with <think> </think>, <topic> </topic>, and <guideline> </guideline> tags. Use the <think> section for thorough analysis and the <guideline> section for the concise, actionable guidance. The response must contain both tags. The guideline must be written as a SINGLE LINE.

Example format:
<think>
I need to analyze the current state and compare the failed action with the correct action. Looking at the observation, I can see that the user is on a login page. The predicted action was to click on a different element, but the correct action is to click on the login button. The mistake was not identifying the correct interactive element for authentication.
</think>

<topic>
authentication and login
</topic>

<guideline>
When on a login or authentication page, if you want to proceed with login (but there are multiple clickable elements), you should click on the primary login button or submit button, typically labeled "Login", "Sign In", or "Submit". The correct action format to do this is ```click [button_id]```.
</guideline>
"""
    discussion = SimpleDiscussion()
    sys_msg = msg_builder.system().add_text(system_prompt)
    discussion.append(sys_msg)
    trace_msg = msg_builder.user().add_text(user_prompt)
    discussion.append(trace_msg)
    
    from agentlab.llm.response_api import APIPayload
    # Use the llm directly (it should now have higher max_new_tokens)
    
    payload = APIPayload(messages=discussion.flatten())
    response = llm(payload)
    
    # Check if response was truncated and handle accordingly
    answer = ""
    if hasattr(response, 'raw_response') and hasattr(response.raw_response, 'choices'):
        choice = response.raw_response.choices[0]
        if hasattr(choice, 'finish_reason') and choice.finish_reason == 'length':
            logger.warning("⚠️ Response was truncated due to length limit.")
            # Try to extract from the raw response content even if truncated
            if hasattr(choice.message, 'content') and choice.message.content:
                answer = choice.message.content
            else:
                answer = str(response)
        else:
            answer = response.think if hasattr(response, "think") else str(response)
    else:
        answer = response.think if hasattr(response, "think") else str(response)
    
    print(f"\033[33mReasoning and Guideline\033[0m: \n{answer}")
    
    # Extract guideline using structured tags
    think, guideline, topic = _extract_think_and_guideline(answer)
    
    if not guideline:
        logger.warning("⚠️ No <guideline> tags found in response, using fallback")
        # Try to extract from the full response if no tags found
        if hasattr(response, 'raw_response') and hasattr(response.raw_response, 'choices'):
            try:
                content = response.raw_response.choices[0].message.content
                if content:
                    think, guideline, topic = _extract_think_and_guideline(content)
                    if not guideline:
                        guideline = content.strip()
            except:
                pass
        
        if not guideline:
            guideline = answer.strip()
        guideline = " ".join(guideline.split())
    
    # Update guidelines dictionary
    if summarization not in guidelines:
        guidelines[summarization] = defaultdict(lambda: [0, 0])
    guidelines[summarization][guideline][0] += 1
    
    # Collect topic if provided
    if guideline_topics is not None and topic:
        guideline_topics.append(topic)
    
    return guidelines


class AutoGuide:
    """
    Agent that processes traces and builds/updates a guideline database (guideline_db.csv) from them.
    Uses autoguide approach: for each step in each trace, predicts action using LLM and compares with actual action.
    If prediction differs from actual action, generates a guideline.
    Works with any domain: Miniwob, WorkArena, or other web automation tasks.
    """

    GUIDELINE_DB_COLUMNS = [
        "time_stamp",
        "task_name",
        "task_seed",
        "base_llm",
        "agent_name",
        "domain_name",
        "user_name",
        "source",
        "semantic_keys",
        "guideline",
    ]

    def __init__(self, model_args: Any, config: JephHinterConfig):
        self.model_args = model_args
        self.config = config
        
        # Create a model with higher max_new_tokens for guideline generation
        # Copy the model_args and modify max_new_tokens
        from copy import deepcopy
        modified_model_args = deepcopy(model_args)
        modified_model_args.max_new_tokens = 16000  # Increase from 4000 to 8000
        
        self.llm = modified_model_args.make_model()
        self.msg_builder = modified_model_args.get_message_builder()
        self.traces = []
        
        # Initialize GenericAgent once for action prediction
        self._init_generic_agent()

    def _init_generic_agent(self):
        """Initialize GenericAgent for action prediction."""
        from agentlab.agents.generic_agent.generic_agent import GenericAgent, GenericAgentArgs
        from agentlab.agents.generic_agent.generic_agent_prompt import GenericPromptFlags
        from agentlab.agents.dynamic_prompting import ObsFlags, ActionFlags
        from bgym import HighLevelActionSetArgs
        
        # Create a minimal GenericAgent configuration
        obs_flags = ObsFlags(
            use_ax_tree=True,
            use_html=False,
            use_screenshot=False,
            use_tabs=False,
            use_error_logs=True,
            use_focused_element=False,
            use_think_history=False,
            use_action_history=False,
            use_past_error_logs=False,
            filter_visible_elements_only=False,
            filter_with_bid_only=False,
            filter_som_only=False,
            extract_visible_tag=False,
            extract_clickable_tag=False,
            extract_coords=None,
            html_type="dom_txt"
        )
        
        action_flags = ActionFlags(
            action_set=HighLevelActionSetArgs(
                subsets="bid",
                multiaction=False,
                strict=True,
                demo_mode=None
            )
        )
        
        flags = GenericPromptFlags(
            obs=obs_flags,
            action=action_flags,
            use_plan=False,
            use_criticise=False,
            use_thinking=False,
            use_memory=False,
            use_concrete_example=True,
            use_abstract_example=False,
            use_hints=False,
            enable_chat=False,
            max_prompt_tokens=None,
            be_cautious=True,
            extra_instructions=None,
            add_missparsed_messages=True,
            max_trunc_itr=20
        )
        
        # Create GenericAgent instance
        agent_args = GenericAgentArgs(
            chat_model_args=self.model_args,
            flags=flags,
            max_retry=1
        )
        
        # Initialize the agent
        self.generic_agent = agent_args.make_agent()
        logger.info("✅ GenericAgent initialized for action prediction")

    def load_traces(self) -> List[Dict[str, Any]]:
        """Load traces from pickle files in the traces folder."""
        traces = []
        if not self.config or not self.config.traces_folder:
            return traces

        # Load all step pickle files
        step_data = load_all_step_pickles(self.config.traces_folder)
        # Group steps by experiment/task and seed
        experiments = {}
        for step in step_data:
            # Extract experiment path from file path
            file_path = step["file"]
            # Extract task name and seed from path (assuming structure like .../task_name_seed/step_*.pkl.gz)
            path_parts = file_path.split(os.sep)
            task_name = "unknown_task"
            seed = "unknown_seed"

            for part in path_parts:
                if "miniwob." in part or "workarena." in part:
                    # Handle new format: "miniwob.use-colorwheel-2_1" or "workarena.servicenow.sort-user-list_85"
                    if "miniwob." in part:
                        prefix = "miniwob."
                    else:  # workarena.
                        prefix = "workarena."

                    # Extract everything after the prefix and split by last underscore
                    task_part = part.split(prefix)[1]
                    # Find the first underscore to separate task name from seed
                    first_underscore_idx = task_part.find("_")
                    if first_underscore_idx != -1:
                        task_name = prefix + task_part[:first_underscore_idx]
                        seed = task_part[first_underscore_idx + 1 :]
                    else:
                        task_name = prefix + task_part
                        seed = "unknown_seed"
                    break

            # Create unique key for task+seed combination
            experiment_key = f"{task_name}_{seed}"
            logger.info(f"experiment_key {experiment_key}")
            if experiment_key not in experiments:
                experiments[experiment_key] = {"task_name": task_name, "seed": seed, "steps": []}
            experiments[experiment_key]["steps"].append(step)

        # Convert to trace format
        for experiment_key, experiment_data in experiments.items():
            trace_info = extract_trace_info(experiment_data["steps"])
            if trace_info:
                traces.append(
                    {
                        "task_name": experiment_data["task_name"],
                        "seed": experiment_data["seed"],
                        "trace_info": trace_info,
                        "step_count": len(experiment_data["steps"]),
                    }
                )

        self.traces = traces
        return traces

    def build_guideline_db(self, output_path: Optional[str] = None):
        """
        Builds guideline database using autoguide approach.
        For each step in each trace, predicts action using LLM and compares with actual action.
        If prediction differs from actual action, generates a guideline.
        """
        if not self.traces:
            self.load_traces()
        output_path = output_path or self.config.hint_db_path

        # Load existing DB if exists
        if os.path.exists(output_path):
            db = pd.read_csv(output_path, dtype=str)
        else:
            db = pd.DataFrame({col: [] for col in self.GUIDELINE_DB_COLUMNS})

        # Build a set of existing guidelines to avoid duplicates
        existing = set()
        if not db.empty:
            for i, row in db.iterrows():
                key = (row.get("task_name", ""), row.get("guideline", ""))
                existing.add(key)

        new_rows = []
        guidelines = defaultdict(lambda: defaultdict(lambda: [0, 0]))  # weight, occurrence
        guideline_topics = []
        seen_summarizations = []  # Initialize seen_summarizations

        for trace in self.traces:
            task_name = trace.get("task_name", "unknown_task")
            seed = trace.get("seed", "unknown_seed")
            trace_info = trace.get("trace_info", [])
            
            if not trace_info:
                continue
                
            logger.info(f"Processing trace for task {task_name} with {len(trace_info)} steps")
            
            # Convert trace to trajectory format for autoguide processing
            trajectory = self._convert_trace_to_trajectory(trace)
            
            # Process each step in the trajectory
            for step_idx in range(len(trajectory) // 2):
                # Get the observation and actual action from the successful trace
                obs_data = trajectory[2 * step_idx]
                actual_action = trajectory[2 * step_idx + 1]
                
                observation = obs_data['observation']['text']
                url = obs_data['info']['page'].url
                
                # Get state summarization
                curr_sum, seen_summarizations = get_status(observation, url, logger, method='self', llm=self.llm, msg_builder=self.msg_builder, seen_summarizations=seen_summarizations)
                
                # Predict action using LLM (simulate agent's prediction)
                predicted_action = self._predict_action_for_step_alternative(
                    trajectory[:2 * step_idx + 1],  # History up to current step
                    task_name
                )
                
                # Compare predicted action with actual action
                if not identical_action(predicted_action, actual_action):
                    logger.info(f"Action mismatch at step {step_idx} for task {task_name}")
                    logger.info(f"Predicted: {predicted_action}")
                    logger.info(f"Actual: {actual_action}")
                    
                    # # Generate guideline
                    later_actions = action_same_state(trajectory, step_idx, step_idx+2, curr_sum, logger, llm=self.llm, msg_builder=self.msg_builder, seen_summarizations=seen_summarizations)
                    prev_actions = action_same_state(trajectory, step_idx-1, step_idx, curr_sum, logger, llm=self.llm, msg_builder=self.msg_builder, seen_summarizations=seen_summarizations)
                    
                    guidelines = gen_guideline(
                        observation,
                        curr_sum,
                        str(predicted_action),
                        str(actual_action),
                        guidelines,
                        later_actions,
                        prev_actions,
                        self.llm,
                        self.msg_builder,
                        guideline_topics
                    )
                    
                    # Add guideline to database
                    for summarization, guideline_dict in guidelines.items():
                        for guideline_text, (weight, occurrence) in guideline_dict.items():
                            key = (task_name, guideline_text)
                            if key not in existing:
                                # Get topic for this guideline (use summarization as fallback)
                                topic = guideline_topics[-1] if guideline_topics else summarization
                                row = {
                                    "time_stamp": datetime.now().strftime("%b %d"),
                                    "task_name": task_name,
                                    "task_seed": seed,
                                    "base_llm": getattr(self.model_args, "model_name", "unknown_llm"),
                                    "agent_name": self.config.agent_name,
                                    "domain_name": self.config.domain_name,
                                    "user_name": self.config.user_name,
                                    "source": self.config.source,
                                    "semantic_keys": topic,
                                    "guideline": guideline_text,
                                }
                                new_rows.append(row)
                                existing.add(key)
                else:
                    logger.info(f"✅ Actions match at step {step_idx} for task {task_name}")

        # Save database
        if new_rows:
            db = pd.concat([db, pd.DataFrame(new_rows)], ignore_index=True)
            db = db[self.GUIDELINE_DB_COLUMNS]  # ensure column order
            logger.info(f"Saving guideline database to {output_path}")
            db.to_csv(output_path, index=False, quoting=1)  # QUOTE_ALL
            logger.info(f"Guideline database updated with {len(new_rows)} new rows (total {len(db)}).")
        else:
            logger.info(f"No new guidelines to add. Database at {output_path} is up to date.")

    def _convert_trace_to_trajectory(self, trace):
        """Convert trace info to trajectory format for guideline generation."""
        trajectory = []
        for step in trace["trace_info"]:
            # Create observation
            obs = {
                "text": step.get("axtree_txt", ""),
                "goal": step.get("goal", []),
                "last_action_error": step.get("error", "")
            }
            
            # Create info with generic URL
            info = {
                "page": type('Page', (), {'url': 'http://example.com'})()  # Placeholder
            }
            
            # Add observation
            trajectory.append({"observation": obs, "info": info})
            
            # Add action
            trajectory.append(step.get("action", "noop()"))
        
        return trajectory

    # def _predict_action_for_step(self, trajectory_history, task_name):
    #     """
    #     Predict action for the current step using the pre-initialized GenericAgent.
    #     """
    #     # Build observation history and actions correctly
    #     obs_history = []
    #     actions = []
        
    #     # Process trajectory history in pairs (obs, action)
    #     for i in range(0, len(trajectory_history), 2):
    #         obs_data = trajectory_history[i]
    #         goal = obs_data['observation'].get('goal', [])
    #         goal_text = " ".join(goal) if goal else "Complete the task"
            
    #         # Create a complete observation that matches what GenericAgent expects
    #         obs_history.append({
    #             "axtree_txt": obs_data['observation']['text'],
    #             "dom_txt": obs_data['observation']['text'],
    #             "goal": goal,
    #             "last_action_error": obs_data['observation'].get('last_action_error', ''),
    #             "chat_messages": [],
    #             "goal_object": [
    #                 {
    #                     "type": "text",
    #                     "text": goal_text
    #                 }
    #             ],
    #             "screenshot": None,
    #             "screenshot_som": None,
    #             "focused_element_bid": None,
    #             "extra_element_properties": {},
    #             "dom_object": None,
    #             "axtree_object": None
    #         })
            
    #         # Only add action if there's a corresponding action in the trajectory
    #         if i + 1 < len(trajectory_history):
    #             actions.append(trajectory_history[i + 1])
        
    #     # Ensure we have the correct relationship: len(obs_history) == len(actions) + 1
    #     if len(obs_history) != len(actions) + 1:
    #         if len(obs_history) > len(actions) + 1:
    #             obs_history = obs_history[:len(actions) + 1]
    #         elif len(obs_history) < len(actions) + 1:
    #             actions = actions[:len(obs_history) - 1]
        
    #     # Set up the agent's internal state BEFORE calling get_action
    #     self.generic_agent.obs_history = obs_history[:-1]  # All observations except the current one
    #     self.generic_agent.actions = actions
    #     self.generic_agent.memories = [None] * len(actions)
    #     self.generic_agent.thoughts = [None] * len(actions)
    #     self.generic_agent.plan = "No plan yet"
    #     self.generic_agent.plan_step = -1
        
    #     # Get the current observation (last one in history)
    #     current_obs = obs_history[-1]
        
    #     # Call the agent's get_action method
    #     predicted_action, agent_info = self.generic_agent.get_action(current_obs)
        
    #     # Extract the action from the response
    #     if hasattr(predicted_action, 'action_type'):
    #         # If it's a structured action, convert to string format
    #         if predicted_action.action_type == 'click':
    #             return f"click [{predicted_action.element_id}]"
    #         elif predicted_action.action_type == 'type':
    #             return f"type [{predicted_action.element_id}] [{predicted_action.text}]"
    #         elif predicted_action.action_type == 'press':
    #             return f"press [{predicted_action.key_combination}]"
    #         elif predicted_action.action_type == 'scroll':
    #             return f"scroll [{predicted_action.direction}]"
    #         elif predicted_action.action_type == 'goto':
    #             return f"goto [{predicted_action.url}]"
    #         elif predicted_action.action_type == 'go_back':
    #             return "go_back"
    #         elif predicted_action.action_type == 'stop':
    #             return f"stop [{predicted_action.answer}]"
    #         elif predicted_action.action_type == 'record':
    #             return f"record [{predicted_action.information}]"
    #         else:
    #             return str(predicted_action)
    #     else:
    #         # If it's already a string, return as is
    #         return str(predicted_action)

    def _extract_action_from_response(self, response_text):
        """
        Extract action from LLM response that contains <think> and <action> tags.
        Returns only the action part, not the thinking part.
        Also converts the action format to match the expected pattern.
        """
        import re
        
        # Look for <action> tags
        action_pattern = r'<action>(.*?)</action>'
        action_match = re.search(action_pattern, response_text, re.DOTALL)
        
        if action_match:
            action = action_match.group(1).strip()
            # Convert action format from [id] to ('aid') pattern
            action = self._convert_action_format(action)
            return action
        else:
            # Fallback: if no <action> tags found, return the whole response
            logger.warning("⚠️ No <action> tags found in response, using fallback")
            return response_text.strip()

    def _convert_action_format(self, action):
        """
        Convert action format from LLM output to expected format.
        Example: click [524] -> click('a524')
        """
        import re
        
        # Pattern to match actions like: click [524], type [123] [content], etc.
        pattern = r'(\w+)\s*\[([^\]]+)\](?:\s*\[([^\]]+)\])?'
        match = re.match(pattern, action.strip())
        
        if match:
            action_type = match.group(1)
            first_param = match.group(2)
            second_param = match.group(3) if match.group(3) else None
            
            if action_type == 'click':
                # Convert click [524] to click('a524')
                if first_param.startswith('a'):
                    return f"click('{first_param}')" 
                else:
                    return f"click('a{first_param}')"
            elif action_type == 'type':
                # Convert type [123] [content] to type('a123', 'content')
                if second_param:
                    return f"type('{first_param}', '{second_param}')"
                else:
                    return f"type('{first_param}')"
            elif action_type == 'press':
                # Convert press [key] to press('key')
                return f"press('{first_param}')"
            elif action_type == 'scroll':
                # Convert scroll [direction] to scroll('direction')
                return f"scroll('{first_param}')"
            elif action_type == 'goto':
                # Convert goto [url] to goto('url')
                return f"goto('{first_param}')"
            elif action_type == 'stop':
                # Convert stop [answer] to stop('answer')
                return f"stop('{first_param}')"
            elif action_type == 'record':
                # Convert record [info] to record('info')
                return f"record('{first_param}')"
            else:
                # For other action types, return as is
                return action
        else:
            # If no pattern matches, return the original action
            return action

    def _predict_action_for_step_alternative(self, trajectory_history, task_name):
        """
        Alternative approach: Predict action for the current step using the same LLM call pattern as JephHinter.
        This avoids GenericAgent compatibility issues by using direct LLM calls.
        """
        # Build observation history and actions correctly
        obs_history = []
        actions = []
        
        # Process trajectory history in pairs (obs, action)
        for i in range(0, len(trajectory_history), 2):
            obs_data = trajectory_history[i]
            goal = obs_data['observation'].get('goal', [])
            goal_text = " ".join(goal) if goal else "Complete the task"
            
            obs_history.append({
                "axtree_txt": obs_data['observation']['text'],
                "goal": goal,
                "last_action_error": obs_data['observation'].get('last_action_error', ''),
            })
            
            # Only add action if there's a corresponding action in the trajectory
            if i + 1 < len(trajectory_history):
                actions.append(trajectory_history[i + 1])
        
        # Build the prompt following GenericAgent's pattern
        current_obs = obs_history[-1]
        goal_text = "".join(current_obs.get('goal', [])) if current_obs.get('goal') else "Complete the task"
        
        # Build history context
        history_context = ""
        for i, (obs, action) in enumerate(zip(obs_history[:-1], actions)):
            history_context += f"Step {i+1}:\n"
            history_context += f"Observation: {obs['axtree_txt'][:500]}...\n"
            history_context += f"Action: {action}\n\n"
        
        # Create the action prediction prompt following GenericAgent's structure
        action_prompt = f"""# Instructions
Review the current state of the page and all other information to find the best
possible next action to accomplish your goal. Your answer will be interpreted
and executed by a program, make sure to follow the formatting instructions.

## Goal:
{goal_text}

## History of interaction with the task:
{history_context}

# Observation of current step:
## Accessibility Tree:
{current_obs['axtree_txt']}

# Action space:
Note: This action set allows you to interact with your environment. Most of them
are python function executing playwright code. The primary way of referring to
elements in the page is through bid which are specified in your observations.


12 different types of actions are available.

noop(wait_ms: float = 1000)
scroll(delta_x: float, delta_y: float)
fill(bid: str, value: str)
select_option(bid: str, options: str | list[str])
click(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
dblclick(bid: str, button: Literal['left', 'middle', 'right'] = 'left', modifiers: list[typing.Literal['Alt', 'Control', 'ControlOrMeta', 'Meta', 'Shift']] = [])
hover(bid: str)
press(bid: str, key_comb: str)
focus(bid: str)
clear(bid: str)
drag_and_drop(from_bid: str, to_bid: str)
send_msg_to_user(text: str)
Only a single action can be provided at once. Example:
fill('a12', 'example with "quotes"')

# Abstract Example

Here is an abstract version of the answer with description of the content of
each tag. Make sure you follow this structure, but replace the content with your
answer:

<think>
Think step by step. If you need to make calculations such as coordinates, write them here. Describe the effect
that your previous action had on the current content of the page.
</think>

<action>
One single action to be executed. You can only use one action at a time.
</action>


# Concrete Example

Here is a concrete example of how to format your answer.
Make sure to follow the template with proper tags:

<think>
From previous action I tried to set the value of year to "2022",
using select_option, but it doesn't appear to be in the form. It may be a
dynamic dropdown, I will try using click with the bid "a324" and look at the
response from the page.
</think>

<action>
click('a324')
</action>
"""

        # Use the same LLM call pattern as JephHinter (which works)
        discussion = SimpleDiscussion()
        sys_msg = self.msg_builder.system().add_text(
            "You are an autonomous agent that predicts the next action in a web automation task. You MUST respond using the structured format with <think> and <action> tags. Use the <think> section for analysis and the <action> section for the action format."
        )
        discussion.append(sys_msg)
        trace_msg = self.msg_builder.user().add_text(action_prompt)
        discussion.append(trace_msg)
        
        from agentlab.llm.response_api import APIPayload
        payload = APIPayload(messages=discussion.flatten())
        response = self.llm(payload)
        
        # Extract action from structured response format
        response_text = response.think if hasattr(response, "think") else str(response)
        action = self._extract_action_from_response(response_text)
        
        # Log action extraction
        logger.debug(f"✅ Successfully extracted action: {action}")
        
        return action


    def get_action(self, obs):
        action = "noop()"
        agent_info = bgym.AgentInfo(
            think="nope",
            chat_messages=[],
            stats={},
        )
        return action, agent_info


class GuidelinesMiner:
    """
    Class to mine guidelines from execution traces using AutoGuide.
    """

    def __init__(self, config: HintsMinerConfig):
        self.config = config
        self.config.hinter_config.traces_folder = config.root_dir
        self.config.hinter_config.hint_db_path = config.output_path

    def run(self):
        logger.info(f"Starting guideline mining from: {self.config.root_dir}")
        logger.info(f"Output will be saved to: {self.config.output_path}")
        agent = AutoGuide(model_args=self.config.model_args, config=self.config.hinter_config)
        agent.build_guideline_db(output_path=self.config.output_path)
        logger.info("Guideline database update complete.")


# Example usage
if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Generate guidelines from execution traces")
    parser.add_argument(
        "--traces-dir",
        "-t",
        default="/home/toolkit/ui-copilot-aug16/subset_successful_trace",
        help="Directory containing execution traces (step_*.pkl.gz files)",
    )
    parser.add_argument(
        "--output",
        "-o",
        default="guideline_db_updated_test.csv",
        help="Output path for guideline database (default: guideline_db_updated.csv)",
    )
    parser.add_argument(
        "--model",
        "-m",
        default="claude-4-sonnet-20250514",
        help="LLM model to use for guideline generation (default: claude-4-sonnet-20250514)",
    )

    args = parser.parse_args()

    # Always generate guidelines (either from default or specified directory)
    from agentlab.llm.base_api import BaseModelArgs
    from agentlab.agents.tool_use_agent import CLAUDE_SONNET_37, GPT_5_nano

    # Use the existing model configuration
    model_args = GPT_5_nano

    # Create config
    config = HintsMinerConfig(
        root_dir=args.traces_dir, output_path=args.output, model_args=model_args
    )

    print(f"Generating guidelines from: {args.traces_dir}")
    print(f"Output will be saved to: {args.output}")
    print(f"Using model: {model_args.model_name}")

    miner = GuidelinesMiner(config)
    miner.run()

    print(f"✅ Guidelines generated successfully! Check {args.output}")
