import json
import logging
import os
from datetime import datetime
from typing import Any, Dict, List, Optional

import bgym
import pandas as pd
from agentlab.llm.response_api import APIPayload

from jephhinter.chat_saver import create_chat_data, save_chat_messages_to_pkl
from jephhinter.configs import HintsMinerConfig, JephHinterConfig, AutoGuideConfig_workarena_l1, AutoGuideConfig_miniwob
from jephhinter.utils.utils import (
    SYSTEM_PROMPT,
    SimpleDiscussion,
    _select_traces_for_hint,
    add_chain_of_thought_tokens,
    construct_hint_prompt,
    construct_hint_prompt_step_wise,
    construct_hint_prompt_step_zoom,
    construct_hint_prompt_step_zoom_dual_trace,
    estimate_token_count,
    extract_structured_response,
    extract_trace_info,
    load_all_step_pickles,
    smart_truncate_text,
    summarize_trace_for_important_steps,
    construct_two_trace_comparison_prompt,
    context_identification,
)

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class JephHinter:
    """
    Agent that processes traces and builds/updates a hint database (hint_db.csv) from them.
    """

    HINT_DB_COLUMNS = [
        "time_stamp",
        "task_name",
        "task_seed",
        "reward",
        "base_llm",
        "agent_name",
        "domain_name",
        "user_name",
        "source",
        "semantic_keys",
        "hint",
    ]

    def __init__(self, model_args: Any, config: JephHinterConfig):
        self.model_args = model_args
        self.config = config
        self.llm = model_args.make_model()
        self.msg_builder = model_args.get_message_builder()
        self.traces = []
        self.chat_output_dir = getattr(config, "chat_output_dir", "chat_messages")

        # Log chat saving status
        chat_saving_enabled = getattr(config, "save_chat_messages", True)
        if chat_saving_enabled:
            logger.info(
                f"✅ Chat message saving enabled during hint generation. Messages will be saved to: {self.chat_output_dir}"
            )
        else:
            logger.info(
                "⚠️ Chat message saving disabled during hint generation. No new chat messages will be saved."
            )

    def load_traces(self) -> List[Dict[str, Any]]:
        """Load traces from pickle files in the traces folder."""
        traces = []
        if not self.config or not self.config.traces_folder:
            return traces

        # Load all step pickle files
        step_data = load_all_step_pickles(self.config.traces_folder)
        # Group steps by experiment/task and seed
        experiments = {}
        for step in step_data:
            # Extract experiment path from file path
            file_path = step["file"]
            # Extract task name and seed from path (assuming structure like .../task_name_seed/step_*.pkl.gz)
            path_parts = file_path.split(os.sep)
            task_name = "unknown_task"
            seed = "unknown_seed"

            for part in path_parts:
                # TODO: Make this generic
                if "miniwob." in part or "workarena." in part or "webarenalite." in part:
                    # Handle new format: "miniwob.use-colorwheel-2_1" or "workarena.servicenow.sort-user-list_85"
                    if "miniwob." in part:
                        prefix = "miniwob."
                    elif "webarenalite." in part:
                        prefix = "webarenalite."
                    else:  # workarena.
                        prefix = "workarena."

                    # Extract everything after the prefix and split by last underscore
                    task_part = part.split(prefix)[1]
                    # Find the first underscore to separate task name from seed
                    first_underscore_idx = task_part.find("_")
                    if first_underscore_idx != -1:
                        task_name = prefix + task_part[:first_underscore_idx]
                        seed = task_part[first_underscore_idx + 1 :]
                    else:
                        task_name = prefix + task_part
                        seed = "unknown_seed"
                    break

            # Create unique key for task+seed combination
            experiment_key = f"{task_name}_{seed}"
            logger.info(f"experiment_key {experiment_key}")
            if experiment_key not in experiments:
                experiments[experiment_key] = {"task_name": task_name, "seed": seed, "steps": []}
            experiments[experiment_key]["steps"].append(step)

        # Convert to trace format
        for experiment_key, experiment_data in experiments.items():
            trace_info = extract_trace_info(experiment_data["steps"])
            if trace_info:
                traces.append(
                    {
                        "task_name": experiment_data["task_name"],
                        "seed": experiment_data["seed"],
                        "trace_info": trace_info,
                        "step_count": len(experiment_data["steps"]),
                    }
                )

        self.traces = traces
        return traces

    def _save_chat_messages(
        self,
        task_name: str,
        seed: str,
        trace_info: List[Dict[str, Any]],
        system_prompt: str,
        user_prompt: str,
        llm_response: Any,
        metadata: Dict[str, Any] = None,
        extracted_components: Dict[str, str] = None,
    ):
        """Save chat messages using chat_saver module."""
        if not getattr(self.config, "save_chat_messages", True):
            return

        try:
            # Prepare metadata
            base_metadata = {
                "hint_prompt_config": {
                    "exclude_axtree": self.config.hint_prompt_config.exclude_axtree,
                    "exclude_actions": self.config.hint_prompt_config.exclude_actions,
                    "exclude_think": self.config.hint_prompt_config.exclude_think,
                    "exclude_reward": self.config.hint_prompt_config.exclude_reward,
                    "n_traces_to_hinter": self.config.hint_prompt_config.n_traces_to_hinter,
                }
            }
            if metadata:
                base_metadata.update(metadata)

            # Create and save chat data
            chat_data = create_chat_data(
                task_name=task_name,
                seed=seed,
                trace_info=trace_info,
                system_prompt=system_prompt,
                user_prompt=user_prompt,
                llm_response=llm_response,
                model_name=getattr(self.model_args, "model_name", "unknown_llm"),
                agent_name=self.config.agent_name,
                domain_name=self.config.domain_name,
                user_name=self.config.user_name,
                source=self.config.source,
                metadata=base_metadata,
                extracted_components=extracted_components,
            )

            # Determine output directory
            hint_db_dir = os.path.dirname(self.config.hint_db_path)
            output_dir = (
                os.path.join(hint_db_dir, self.chat_output_dir)
                if hint_db_dir
                else os.path.join(self.config.traces_folder, self.chat_output_dir)
            )

            save_chat_messages_to_pkl(chat_data, output_dir, task_name, seed)

        except Exception as e:
            logger.error(f"Failed to save chat messages for task {task_name}: {e}")

    def build_hint_db(self, output_path: Optional[str] = None):
        """
        Loads (or creates) a hint database CSV, adds new hints for new traces/tasks, and avoids duplicates.
        Each row matches the columns and order of the current hint_db.csv.
        """
        if not self.traces:
            self.load_traces()
        output_path = output_path or self.config.hint_db_path

        # Load existing DB if exists
        if os.path.exists(output_path):
            db = pd.read_csv(output_path, dtype=str)
        else:
            db = pd.DataFrame({col: [] for col in self.HINT_DB_COLUMNS})

        # Build a set of existing hints to avoid duplicates
        existing = set()
        if not db.empty:
            for i, row in db.iterrows():
                key = (row.get("task_name", ""), row.get("hint", ""))
                existing.add(key)

        # Group traces by task name
        tasks_traces = {}
        for trace in self.traces:
            task_name = trace.get("task_name", "unknown_task")
            if task_name not in tasks_traces:
                tasks_traces[task_name] = []
            tasks_traces[task_name].append(trace)

        new_rows = []
        hint_topics = []
        cost_so_far = self.llm.stats.stats_dict.get("effective_cost", 0)
        for task_name, task_traces in tasks_traces.items():
            logger.info(f"Cost so far: {cost_so_far}")
            if not task_traces:
                continue
            n_traces_to_hinter = self.config.hint_prompt_config.n_traces_to_hinter
            n_hints_per_task = getattr(self.config.hint_prompt_config, "n_hints_per_task", 1)
            hints_for_this_task = set()
            context = None
            
            for hint_iteration in range(n_hints_per_task):
                self.llm.reset_stats() # Reset stats to track cost per hint
                hint_topics_str = (
                    ("- " + "\n- ".join(list(set(hint_topics))))
                    if hint_topics
                    else "No topics yet."
                )
                selected_trace_infos = _select_traces_for_hint(task_traces, n_traces_to_hinter)
                if not selected_trace_infos:
                    continue

                # Check which hinting approach to use
                if (
                    self.config.hint_prompt_config.step_wise_hinting
                    and len(selected_trace_infos) == 1
                ):
                    print(f"🎯 SCENARIO: Step-wise hinting (single trace) for task: {task_name}")

                    # Use step-wise hinting approach - generate n-k+1 hints for n steps when using k consecutive steps
                    trace_info = selected_trace_infos[0]
                    n_steps = len(trace_info)
                    n_steps_for_hinting = self.config.hint_prompt_config.n_steps_for_hinting

                    # Adaptive step count: ensure n_steps_for_hinting doesn't exceed available steps
                    if n_steps_for_hinting > n_steps:
                        logger.warning(
                            f"⚠️ Requested {n_steps_for_hinting} steps for hinting, but trace only has {n_steps} steps. Adjusting to use {n_steps} steps."
                        )
                        n_steps_for_hinting = n_steps

                    # Special case: if we only have 1 step, we can't generate step-wise hints
                    if n_steps == 1:
                        logger.warning(
                            "⚠️ Trace has only 1 step, which is insufficient for step-wise hinting. Skipping step-wise hinting and falling back to other methods."
                        )
                        # Continue to the next hinting approach
                        pass
                    else:
                        n_hints_to_generate = n_steps - n_steps_for_hinting + 1

                        # Log the final configuration after any adjustments
                        if (
                            n_steps_for_hinting
                            != self.config.hint_prompt_config.n_steps_for_hinting
                        ):
                            logger.info(
                                f"📝 Final step-wise hinting configuration: {n_steps_for_hinting} consecutive steps (adjusted from requested {self.config.hint_prompt_config.n_steps_for_hinting})"
                            )
                        else:
                            logger.info(
                                f"📝 Step-wise hinting configuration: {n_steps_for_hinting} consecutive steps (as requested)"
                            )

                        logger.info(
                            f"🎯 Using step-wise hinting - Generating {n_hints_to_generate} hints for {n_steps} steps using {n_steps_for_hinting} consecutive steps"
                        )
                        logger.info(
                            f"   This will create hints for step sequences: 1-{n_steps_for_hinting}, 2-{n_steps_for_hinting + 1}, ..., {n_hints_to_generate}-{n_steps}"
                        )

                        # Generate hints for each step sequence
                        step_wise_hints = []
                        for i in range(n_hints_to_generate):
                            start_step_idx = i
                            end_step_idx = i + n_steps_for_hinting - 1
                            step_range = f"{start_step_idx + 1}-{end_step_idx + 1}"
                            logger.info(
                                f"   Generating hint for step sequence {step_range} ({i + 1}/{n_hints_to_generate})"
                            )

                            reward = sum(
                                step.get("reward", 0)
                                for trace_info in selected_trace_infos
                                for i, step in enumerate(trace_info)
                                if i >= start_step_idx and i <= end_step_idx
                            )

                            # Construct prompt for this step sequence
                            step_hint_prompt = construct_hint_prompt_step_wise(
                                trace_info,
                                start_step_idx,
                                n_steps_for_hinting,
                                task_name,
                                self.config.hint_prompt_config,
                                hint_topics_str,
                            )

                            if step_hint_prompt:
                                # Generate hint for this step sequence using existing LLM call pattern
                                discussion = SimpleDiscussion()
                                sys_msg = self.msg_builder.system().add_text(
                                    "You are a hint generation expert. You MUST respond using the structured format with <think> and <hint> tags. Use the <think> section for thorough analysis (200-800 words) and the <hint> section for concise, actionable guidance (under 256 tokens)."
                                )
                                discussion.append(sys_msg)
                                trace_msg = self.msg_builder.user().add_text(step_hint_prompt)
                                discussion.append(trace_msg)

                                payload = APIPayload(messages=discussion.flatten())
                                response = self.llm(payload)
                                cost_so_far = self.llm.stats.stats_dict.get("effective_cost", 0)
                                logger.info(f"Cost so far: {cost_so_far}")

                                # Extract hint from structured response format
                                response_text = (
                                    response.think if hasattr(response, "think") else str(response)
                                )
                                think, hint, topic = extract_structured_response(response_text)
                                if topic:
                                    hint_topics.append(topic)

                                # Log structured response parsing
                                if think and hint:
                                    logger.info(
                                        f"✅ Successfully parsed structured response for step-wise hint {step_range}"
                                    )
                                    logger.info(
                                        f"   Thinking: {len(think)} chars, Hint: {len(hint)} chars"
                                    )

                                    if len(hint) > 3000:
                                        logger.warning(
                                            f"⚠️ Step-wise hint for {step_range} is very long ({len(hint)} chars) - consider making it more concise"
                                        )
                                elif not hint:
                                    logger.warning(
                                        f"⚠️ No <hint> tags found in step-wise response for {step_range}, using fallback"
                                    )
                                    hint = response_text.strip()
                                    hint = " ".join(hint.split())

                                # Save chat messages for this step-wise hint
                                self._save_chat_messages(
                                    task_name=task_name,
                                    seed=task_traces[0].get("seed", "unknown_seed"),
                                    trace_info=[
                                        trace_info[start_step_idx : end_step_idx + 1]
                                    ],  # Only the steps involved in this sequence
                                    system_prompt="None",
                                    user_prompt=step_hint_prompt,
                                    llm_response=response,
                                    metadata={
                                        "step_sequence": step_range,
                                        "n_steps_in_sequence": n_steps_for_hinting,
                                        "original_requested_steps": self.config.hint_prompt_config.n_steps_for_hinting,
                                        "hint_type": "step_wise",
                                    },
                                    extracted_components={
                                        "thinking": think,
                                        "hint": hint,
                                        "topic": topic,
                                    },
                                )

                                step_wise_hints.append(
                                    {
                                        "step_sequence": step_range,
                                        "hint": hint,
                                        "reward": reward,
                                        "topic": topic,
                                    }
                                )

                                # Debug logging for step-wise hints
                                logger.info(f"💾 Step-wise hint saved for {step_range}:")
                                logger.info(f"💾 Hint length: {len(hint)}")
                                logger.info(f"💾 Hint preview: {hint[:200]}...")
                                logger.info(f"💾 Think length: {len(think)}")
                                logger.info(f"💾 Think preview: {think[:200]}...")

                        # Add all step-wise hints to the database
                        for step_hint in step_wise_hints:
                            key = (task_name, step_hint["hint"])
                            if key not in existing:
                                first_trace = task_traces[0]
                                row = {
                                    "time_stamp": datetime.now().strftime("%b %d"),
                                    "task_name": task_name,
                                    "task_seed": first_trace.get("seed", "unknown_seed"),
                                    "reward": step_hint["reward"],
                                    "base_llm": getattr(
                                        self.model_args, "model_name", "unknown_llm"
                                    ),
                                    "agent_name": self.config.agent_name,
                                    "domain_name": self.config.domain_name,
                                    "user_name": self.config.user_name,
                                    "source": self.config.source,
                                    "semantic_keys": step_hint.get(
                                        "topic",
                                        f"step_wise_{step_hint['step_sequence']}_sequence_{n_steps_for_hinting}_steps",
                                    ),
                                    "hint": step_hint["hint"],
                                }
                                new_rows.append(row)
                                existing.add(key)

                        logger.info(
                            f"✅ Generated {len(step_wise_hints)} step-wise hints for task {task_name}"
                        )
                        # Skip the regular hint generation since we've already generated step-wise hints
                        continue

                elif (
                    self.config.hint_prompt_config.use_step_zoom and len(selected_trace_infos) <= 2
                ):
                    if len(selected_trace_infos) == 1:
                        print(f"🎯 SCENARIO: Step zoom (single trace) for task: {task_name}")
                        # Single trace step zoom
                        trace_info = selected_trace_infos[0]
                        summarizer_llm = self.llm
                        msg_builder = self.msg_builder
                        important_indices = summarize_trace_for_important_steps(
                            trace_info, summarizer_llm, msg_builder, self.config.hint_prompt_config
                        )
                        logger.info(
                            f"Using step zoom (single trace) - Important indices: {important_indices}"
                        )
                        hint_prompt = construct_hint_prompt_step_zoom(
                            trace_info,
                            important_indices,
                            task_name,
                            self.config.hint_prompt_config,
                            hint_topics_str,
                        )
                    else:
                        print(f"🎯 SCENARIO: Step zoom (dual trace) for task: {task_name}")
                        # Dual trace step zoom - analyze both traces and find important steps
                        logger.info(
                            "Using step zoom (dual trace) - Analyzing both traces for important steps"
                        )

                        # Get important steps from both traces
                        all_important_indices = []
                        combined_trace_info = []

                        for trace_idx, trace_info in enumerate(selected_trace_infos):
                            summarizer_llm = self.llm
                            msg_builder = self.msg_builder
                            important_indices = summarize_trace_for_important_steps(
                                trace_info,
                                summarizer_llm,
                                msg_builder,
                                self.config.hint_prompt_config,
                            )
                            logger.info(
                                f"Trace {trace_idx + 1} important indices: {important_indices}"
                            )

                            # Add trace identifier and important indices
                            for idx in important_indices:
                                all_important_indices.append((trace_idx, idx))

                            # Combine trace info with trace identifier
                            for step_idx, step in enumerate(trace_info):
                                step_with_trace = step.copy()
                                step_with_trace["trace_id"] = trace_idx
                                step_with_trace["step_id"] = step_idx
                                combined_trace_info.append(step_with_trace)

                        # Use the combined trace info and important indices for step zoom
                        hint_prompt = construct_hint_prompt_step_zoom_dual_trace(
                            combined_trace_info,
                            all_important_indices,
                            task_name,
                            self.config.hint_prompt_config,
                            hint_topics_str,
                        )
                elif (
                    self.config.source == "autoguide"
                ):
                    #TODO: Remove Autoguide specific code from JephHinter
                    assert len(selected_trace_infos) == 2, "Autoguide requires two traces"
                    print(f"🎯 SCENARIO: Autoguide two-trace comparison for task: {task_name}")
                    
                    # Use the two-trace comparison prompt with retry logic
                    hint_prompt = None
                    n_repeats = 0
                    max_n_repeats = 5  # Maximum number of attempts to find valid trace pairs
                    
                    while n_repeats < max_n_repeats:
                        if n_repeats > 0:
                            # Re-select traces for subsequent attempts
                            selected_trace_infos = _select_traces_for_hint(task_traces, n_traces_to_hinter)
                            if not selected_trace_infos:
                                break

                        # Determine which trace is desired (successful) and which is undesired (failed)
                        trace_rewards = []
                        for trace_info in selected_trace_infos:
                            cum_reward = sum(step.get("reward", 0) for step in trace_info)
                            trace_rewards.append(cum_reward)
                        
                        # Identify desired (successful) and undesired (failed) traces
                        if trace_rewards[0] == trace_rewards[1]:
                            logger.warning("The two traces have the same reward, so they cannot be used for comparison")
                        else:
                            if trace_rewards[0] > trace_rewards[1]:
                                desired_trace = selected_trace_infos[0]
                                undesired_trace = selected_trace_infos[1]
                            else:
                                desired_trace = selected_trace_infos[1]
                                undesired_trace = selected_trace_infos[0]
                        
                            context = context_identification(selected_trace_infos, self.llm, self.msg_builder)
                            logger.info(f"Context: {context}")

                            hint_prompt = construct_two_trace_comparison_prompt(
                                [desired_trace, undesired_trace],
                                task_name,
                                context,
                                self.config.hint_prompt_config,
                                hint_topics_str,
                            )
                            break

                        n_repeats += 1
                        
                        if not hint_prompt and n_repeats < max_n_repeats:
                            logger.warning(f"No hint prompt generated for two-trace comparison (attempt {n_repeats}/{max_n_repeats}), retrying with different traces")
                    
                    if not hint_prompt:
                        logger.warning(f"Failed to generate hint prompt after {max_n_repeats} attempts for two-trace comparison")
                        continue

                else:
                    print(f"🎯 SCENARIO: Full trace hint generation for task: {task_name}")
                    # Use regular prompt construction with automatic truncation
                    hint_prompt = construct_hint_prompt(
                        selected_trace_infos,
                        task_name,
                        hint_prompt_config=self.config.hint_prompt_config,
                        hint_topics=hint_topics_str,
                    )

                # Add chain of thought tokens to encourage deeper thinking
                hint_prompt_parts = hint_prompt.split("\n")
                hint_prompt_parts = add_chain_of_thought_tokens(hint_prompt_parts)
                hint_prompt = "\n".join(hint_prompt_parts)

                # Final safety check: ensure prompt is within token limits
                estimated_tokens = estimate_token_count(hint_prompt)
                if (
                    estimated_tokens > 150000
                ):  # More aggressive limit to account for system prompt and other parts
                    logger.warning(
                        f"Generated prompt is too long ({estimated_tokens} tokens). Truncating to fit within limits."
                    )

                    # Simple approach: truncate the prompt text itself to fit within limits
                    max_chars = 150000 * 4  # Convert tokens back to characters (rough estimate)
                    if len(hint_prompt) > max_chars:
                        truncated_prompt = hint_prompt[:max_chars] + "\n... [truncated for length]"
                        hint_prompt = truncated_prompt
                        logger.info(
                            f"Truncated prompt from {len(hint_prompt)} to {max_chars} characters"
                        )

                # Always calculate final tokens for logging
                final_tokens = estimate_token_count(hint_prompt)
                logger.info(f"Total estimated tokens (system + user): {final_tokens}")

                if final_tokens > 180000:
                    logger.warning(
                        f"Total message would be too long ({final_tokens} tokens). Further truncation needed."
                    )
                    # Truncate more aggressively
                    max_chars = 120000 * 4  # Very aggressive truncation
                    if len(hint_prompt) > max_chars:
                        truncated_prompt = (
                            hint_prompt[:max_chars] + "\n... [further truncated for total length]"
                        )
                        hint_prompt = truncated_prompt
                        logger.info(f"Further truncated prompt to {max_chars} characters")
                        # Recalculate final tokens after further truncation
                        final_tokens = estimate_token_count(hint_prompt)

                # Log final prompt length
                logger.info(f"Final hint prompt for task {task_name}: {final_tokens} tokens")

                reward = sum(
                    step.get("reward", 0)
                    for trace_info in selected_trace_infos
                    for step in trace_info
                )
                discussion = SimpleDiscussion()
                sys_msg = self.msg_builder.system().add_text(SYSTEM_PROMPT)
                discussion.append(sys_msg)
                trace_msg = self.msg_builder.user().add_text(hint_prompt)
                discussion.append(trace_msg)

                payload = APIPayload(messages=discussion.flatten())
                response = self.llm(payload)
                cost_so_far = self.llm.stats.stats_dict.get("effective_cost", 0)
                logger.info(f"Cost so far: {cost_so_far}")

                # Extract hint from structured response format
                response_text = response.think if hasattr(response, "think") else str(response)
                think, hint, topic = extract_structured_response(response_text)

                if context:
                    topic = context
                    
                if topic:
                    hint_topics.append(topic)

                # Log structured response parsing
                if think and hint:
                    logger.info(f"✅ Successfully parsed structured response for task {task_name}")
                    logger.info(
                        f"   Thinking: {len(think)} chars, Hint: {len(hint)} chars, Topic: {len(topic)} chars"
                    )
                    if len(hint) > 3000:
                        logger.warning(
                            f"⚠️ Hint is very long ({len(hint)} chars) - consider making it more concise"
                        )
                elif not hint:
                    logger.warning(
                        f"⚠️ No <hint> tags found in response for task {task_name}, using fallback"
                    )
                    hint = response_text.strip()
                    hint = " ".join(hint.split())

                # Apply smart truncation if hint is too long
                if len(hint) > 10000:
                    original_length = len(hint)
                    hint = smart_truncate_text(hint, max_length=10000, context="hint")
                    logger.info(
                        f"📝 Hint for task {task_name} was truncated from {original_length} to {len(hint)} characters using smart truncation"
                    )

                # Save chat messages for this hint generation
                self._save_chat_messages(
                    task_name=task_name,
                    seed=task_traces[0].get("seed", "unknown_seed"),
                    trace_info=selected_trace_infos,
                    system_prompt=str(sys_msg),
                    user_prompt=hint_prompt,
                    llm_response=response,
                    metadata={"hint_type": "regular", "n_traces_used": len(selected_trace_infos), "llm_stats": self.llm.stats.stats_dict},
                    extracted_components={
                        "thinking": think,
                        "hint": hint,
                        "topic": topic,
                    },
                )

                key = (task_name, hint)
                if key in existing or hint in hints_for_this_task:
                    continue
                first_trace = task_traces[0]
                total_steps = sum(len(trace_info) for trace_info in selected_trace_infos)
                row = {
                    "time_stamp": datetime.now().strftime("%b %d"),
                    "task_name": task_name,
                    "task_seed": first_trace.get("seed", "unknown_seed"),
                    "reward": reward,
                    "base_llm": getattr(self.model_args, "model_name", "unknown_llm"),
                    "agent_name": self.config.agent_name,
                    "domain_name": self.config.domain_name,
                    "user_name": self.config.user_name,
                    "source": self.config.source,
                    "semantic_keys": topic
                    or f"trace_analysis_{len(selected_trace_infos)}_traces_{total_steps}_steps",
                    "hint": hint,
                }
                new_rows.append(row)
                self.save_db(output_path, db, new_rows)
                existing.add(key)
                hints_for_this_task.add(hint)

        # Append new rows and save
        self.save_db(output_path, db, new_rows)

    def save_db(self, output_path, db, new_rows):
        if new_rows:
            save_db = pd.concat([db, pd.DataFrame(new_rows)], ignore_index=True)
            save_db = save_db[self.HINT_DB_COLUMNS]  # ensure column order
            # Save with proper CSV formatting to handle quotes and special characters
            logger.info(f"Saving hint database to {output_path}")
            save_db.to_csv(output_path, index=False, quoting=1)  # QUOTE_ALL
            logger.info(
                f"Hint database updated with {len(new_rows)} new rows (total {len(save_db)})."
            )
        else:
            logger.info(f"No new hints to add. Database at {output_path} is up to date.")

    def get_action(self, obs):
        action = "noop()"
        agent_info = bgym.AgentInfo(
            think="nope",
            chat_messages=[],
            stats={},
        )
        return action, agent_info


class HintsMiner:
    """
    Class to mine hints from execution traces using JephHinter.
    """

    def __init__(self, config: HintsMinerConfig):
        self.config = config
        self.config.hinter_config.traces_folder = config.root_dir
        self.config.hinter_config.hint_db_path = config.output_path

    def run(self):
        logger.info(f"Starting hint mining from: {self.config.root_dir}")
        logger.info(f"Output will be saved to: {self.config.output_path}")
        agent = JephHinter(model_args=self.config.model_args, config=self.config.hinter_config)
        agent.build_hint_db(output_path=self.config.output_path)
        logger.info("Hint database update complete.")


# Example usage
if __name__ == "__main__":
    # config = HintsMinerConfig()
    # miner = HintsMiner(config)
    # miner.run()
    import argparse

    parser = argparse.ArgumentParser(description="Generate hints from execution traces")
    parser.add_argument(
        "--traces-dir",
        "-t",
        default="/home/toolkit/agentlab_results/2025-08-28_19-04-58-jephhinter-genericagent-gpt-5-nano-2025-08-07_no_hints",
        help="Directory containing execution traces (step_*.pkl.gz files)",
    )
    parser.add_argument(
        "--output",
        "-o",
        default="hint_db_updated.csv",
        help="Output path for hint database (default: hint_db_updated.csv)",
    )
    parser.add_argument(
        "--model",
        "-m",
        default="claude-4-sonnet-20250514",
        help="LLM model to use for hint generation (default: claude-4-sonnet-20250514)",
    )

    args = parser.parse_args()

    # Always generate hints (either from default or specified directory)
    from agentlab.agents.tool_use_agent.tool_use_agent import CLAUDE_SONNET_4

    # Use the existing model configuration
    model_args = CLAUDE_SONNET_4

    # Create config
    config = HintsMinerConfig(
        root_dir=args.traces_dir, output_path=args.output, model_args=model_args, hinter_config=AutoGuideConfig_workarena_l1()
    )

    print(f"Generating hints from: {args.traces_dir}")
    print(f"Output will be saved to: {args.output}")
    print(f"Using model: {model_args.model_name}")

    miner = HintsMiner(config)
    miner.run()

    print(f"✅ Hints generated successfully! Check {args.output}")
