import json, random, argparse
from pydantic import BaseModel
from tqdm import trange
from openai import OpenAI
import os

with open("./data/data/openai.key", "r") as f:
    OPENAI_API_KEY = f.read()

class SingleThink(BaseModel):
    think: str

class RandomWalkWanderer:
    def __init__(self, path="./data/data/preprocessed/data.json", max_turns=7, below5_thr=1.0, min_samples_thr=10, seed=0):
        random.seed(seed)
        with open(path, "r") as f:
            data = json.load(f)
        self.turns_by_qid = data["turns"]
        self.user_query_by_qid = data["user_query"]
        self.max_turns = max_turns
        self.below5_thr = below5_thr
        self.min_samples_thr = min_samples_thr
        self.query_ids = self._filter_query_ids()
        self.cursor = 0
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        
        # Randomized cross-turn reflection templates for random walk behavior
        self.reflection_templates = [
            "Turn {best_turn} yielded interesting documents compared to Turn {worst_turn} - but that's the beauty of random exploration, each path reveals unique facets worth investigating regardless of which direction seems more directly relevant.",
            
            "Comparing my wandering so far, Turn {best_turn} uncovered some compelling content, whereas Turn {worst_turn}'s approach led me down an entirely different rabbit hole - both have value in this exploratory journey of discovery.",
            
            "Reflecting on my random walk, Turn {best_turn} brought me documents that felt more directly relevant, while Turn {worst_turn} took me on an interesting tangent - sometimes wandering reveals unexpected connections worth pursuing.",
            
            "Looking back at my exploration, Turn {best_turn} seemed to hit something substantial, contrasting with Turn {worst_turn} which wandered into different territory - both serve the random walk process of diverse discovery.",
            
            "My stochastic search so far shows Turn {best_turn} resonated well with available content, while Turn {worst_turn}'s strategy took me somewhere completely different - random walks thrive on this kind of diverse exploration pattern.",
            
            "Analyzing my meandering path, Turn {best_turn} seemed to strike a chord with the document corpus, whereas Turn {worst_turn} led me down an alternative route - this diversity is exactly what makes random exploration valuable for discovery."
        ]

    def _filter_query_ids(self):
        out = []
        for qid in self.turns_by_qid:
            uq = self.user_query_by_qid[qid]
            if isinstance(uq, dict) and "score" in uq and uq["score"].get("below5") is not None:
                if uq["score"]["below5"] > self.below5_thr:
                    continue
            if len(self.turns_by_qid[qid]) < self.min_samples_thr:
                continue
            out.append(qid)
        return out

    def _user_query_text(self, qid):
        uq = self.user_query_by_qid[qid]
        return uq["user_query"] if isinstance(uq, dict) and "user_query" in uq else uq

    def _select_search_trace(self, qid, k=7):
        candidates = []
        for t in self.turns_by_qid[qid]:
            candidates.append((
                t.get("search_query") or "",
                t.get("think"),
                t.get("top_k_results") or [],
                t.get("best_cosine"),
                t.get("best_rank")
            ))
        selected = random.sample(candidates, k=min(k, len(candidates)))
        # Random shuffle - no strategic progression for random walk
        random.shuffle(selected)
        return selected

    def _extract_keywords_from_query(self, query_text):
        # Simplified keyword extraction for cross-turn analysis
        # Split on common delimiters and take meaningful terms
        words = query_text.lower().replace('"', '').split()
        # Filter out common stop words and keep meaningful terms
        stop_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'how', 'what', 'when', 'where', 'why', 'is', 'are', 'was', 'were'}
        keywords = [w for w in words if len(w) > 2 and w not in stop_words]
        return keywords[:4]  # Return up to 4 keywords
        lines = []
        for i, d in enumerate(docs, 1):
            if not isinstance(d, str):
                continue
            lines.append(f'{i}. """{d}"""')
        return "\n\n".join(lines)

    def _format_prev_docs(self, docs):
        lines = []
        for i, d in enumerate(docs, 1):
            if not isinstance(d, str):
                continue
            lines.append(f'{i}. """{d}"""')
        return "\n\n".join(lines)

    def _generate_single_think(self, q_text, turn_idx, search_query, prev_docs, original_thinking, search_history):
        system_prompt = """You are generating synthetic training data for a random walk search agent that combines stochastic exploration with self-reflective learning. This is a DATA GENERATION process where each think sequence represents the internal reasoning of an intelligent search agent that learns through analyzing retrieved document content while following curiosity-driven exploration patterns.

CRITICAL RULES FOR RANDOM WALK THINK SEQUENCE GENERATION:
1. This think sequence corresponds to turn {turn_num} and MUST produce the search query for this specific turn
2. Think sequences must demonstrate EXPLORATORY LEARNING - combining curiosity-driven wandering with analysis of retrieved content
3. Turn 1 is EXPLORATORY START: "Given the user query, I'm curious to explore..." with natural wandering approach
4. Turn 2+ are REFLECTIVE + WANDERING: Start with 1-2 sentences analyzing previous retrieved results, then curiosity-driven planning for diverse exploration
5. CROSS-TURN EXPLORATION: For turns 3+, compare how different exploratory approaches yielded different types of content using specific turn numbers
6. WANDERING TREND AWARENESS: Acknowledge which exploration directions revealed interesting content while maintaining exploratory spirit
7. STRATEGIC CURIOSITY: Reference specific search terms that opened interesting avenues while planning diverse exploration paths

STRICTLY FORBIDDEN - NEVER MENTION:
- Any numerical scores, float values, or quantitative measurements
- Ranks, positions, or numerical performance indicators
- Any external evaluation metrics - all analysis must be content-based

THE AGENT CAN ONLY SEE AND REASON ABOUT:
- The user's original query  
- The actual text content of documents retrieved in previous turns
- The search queries it previously used
- NOTHING ELSE - no numerical feedback

PSEUDO-REWARD GENERATION THROUGH CONTENT ANALYSIS:
The agent evaluates exploration effectiveness by comparing document content diversity and relevance:
- "These documents revealed interesting aspects of [topic] that I hadn't considered before"  
- "This exploration direction uncovered a different facet - let me continue wandering in adjacent areas"
- "The content here opens up new avenues for exploration I want to pursue"
- "This tangent led to documents with unexpected connections worth investigating further"
- "My wandering in Turn X revealed rich content about [specific aspect], suggesting this exploration path has potential"

REFLECTIVE REASONING PATTERNS (for turns 2+):
- Analyze retrieved content diversity and topical coverage from previous exploration
- Compare how different wandering approaches revealed different aspects using specific content observations  
- Identify WHY certain exploration directions yielded interesting content through content analysis
- Create internal exploration signals: "This worked because the documents contained diverse perspectives on..." 
- When documents reveal interesting content: "My wandering uncovered fascinating material about..."
- When documents suggest new directions: "These results make me curious about adjacent areas like..."
- Reference specific previous explorations: "Turn X revealed interesting content about [topic], now I want to explore [related area]"
- Show learning from content patterns while maintaining exploratory curiosity
- For turns 3+, reflect on specific turn exploration outcomes: "Turn X's exploration of [keywords] revealed [content type], while Turn Y with [keywords] uncovered different material"

DETAILED TURN-WISE EXPLORATION ANALYSIS:
- Use specific turn references: "Turn 2's wandering approach revealed documents about...", "Comparing Turn 1 and Turn 3 exploration results..."
- Provide concrete content diversity analysis: "Turn 2 uncovered technical aspects while Turn 1 revealed general principles"
- Trace exploration evolution: "Moving from Turn 1's broad exploration to Turn 2's tangential focus revealed different content types"
- Acknowledge exploration trajectories: "My wandering has revealed increasingly diverse perspectives since Turn 1"
- Reference keyword exploration effectiveness: "The exploration using [keyword] in Turn 2 opened interesting avenues compared to [other keyword] in Turn 1"
- Show curiosity-driven learning: "Based on Turn 2's interesting discoveries, I want to explore related but different areas"

SEARCH QUERY EVOLUTION FOR RANDOM WALKS:
- The think sequence must construct reasoning that naturally leads to the target search query
- Show curiosity-driven logic that causally connects to the specific search terms being produced
- Frame exploration decisions in terms of the actual query being generated
- Maintain random walk narrative while ensuring semantic coherence with target search query

Generate think sequences that read like internal monologues of a curious intelligent agent learning to explore through content analysis while maintaining random walk exploration principles."""

        # Build context based on previous turns
        context_prompt = f"""Generate 1 coherent exploratory thinking sequence for this turn:

USER QUERY: {q_text}

TURN {turn_idx + 1}:
- Will produce search query: "{search_query}"
"""

        if turn_idx > 0:
            context_prompt += f"- Documents retrieved from previous exploration: {self._format_prev_docs(prev_docs)}\n"
            context_prompt += f"- Reference original thinking (for inspiration): {original_thinking}\n"
        
        if search_history:
            context_prompt += "\nPREVIOUS EXPLORATION HISTORY (for cross-turn analysis):\n"
            for i, (prev_query, prev_think) in enumerate(search_history):
                context_prompt += f"Turn {i + 1}: Query=\"{prev_query}\", Thinking=\"{prev_think}\"\n"

        # Add cross-turn reflection examples for random walk exploration
        if turn_idx >= 2 and random.random() < 0.5:
            context_prompt += f"\nCROSS-TURN EXPLORATION EXAMPLE (use similar pattern when relevant):\n{random.choice(self.reflection_templates)}\n"

        context_prompt += f"""
GENERATION REQUIREMENTS:
- Turn 1: Initial curiosity-driven exploration strategy based on user query understanding
- Turn 2+: Start with content analysis reflection from previous exploration, then curiosity-driven wandering planning
- Turn 3+: Occasionally compare exploration approaches from previous turns using specific turn numbers
- Create pseudo-rewards through content diversity assessment: analyze document variety, topic coverage, interesting discoveries
- Reference previous turns that revealed interesting content when planning new explorations
- Demonstrate learning from content patterns while maintaining exploratory wandering spirit
- Use keywords strategically based on observed content diversity and interesting discoveries
- Generate realistic wandering agent reasoning that leads naturally to the target search query through curiosity-driven reflection

Generate a single think sequence that demonstrates natural exploration progression with cross-turn content analysis."""

        response = self.client.beta.chat.completions.parse(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": system_prompt.format(turn_num=turn_idx + 1)},
                {"role": "user", "content": context_prompt}
            ],
            response_format=SingleThink
        )
        
        return response.choices[0].message.parsed.think

    def generate_sample(self):
        if not self.query_ids:
            return None
        if self.cursor >= len(self.query_ids):
            self.cursor = 0
        qid = self.query_ids[self.cursor]
        self.cursor += 1

        q_text = self._user_query_text(qid)
        search_trace = self._select_search_trace(qid, k=7)

        synthetic_sequence = []
        search_history = []
        similarity_history = []

        T = min(7, len(search_trace))
        
        for t in range(T):
            s_t, th_t, R_t, cos_t, rank_t = search_trace[t]
            similarity_history.append(cos_t)
            
            # Get previous turn documents for context
            prev_docs = search_trace[t-1][2] if t > 0 else []
            
            # Generate think sequence for this specific turn
            think_t = self._generate_single_think(
                q_text=q_text,
                turn_idx=t,
                search_query=s_t,
                prev_docs=prev_docs,
                original_thinking=th_t,
                search_history=search_history
            )
            
            synthetic_sequence.append({"tag": "think", "messages": think_t})
            synthetic_sequence.append({
                "tag": "search_query", 
                "text": s_t,
                "top_k_results": R_t,
                "best_cosine": cos_t,
                "best_rank": rank_t
            })
            
            # Add this turn to history for future turns
            search_history.append((s_t, think_t))

        return {
            "query_id": qid,
            "user_query": q_text,
            "sequence": synthetic_sequence,
            "behavior_type": "random_walk_wanderer"
        }
    
if __name__=='__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--start_index', type=int, required=True, help='Starting index for generation')
    parser.add_argument('--end_index', type=int, required=True, help='Ending index for generation')
    args = parser.parse_args()
    
    obj = RandomWalkWanderer()
    obj.cursor = args.start_index
    output_file = f'generated_samples_random_walk_{args.start_index}_{args.end_index}.jsonl'
    
    with open(output_file, 'a') as f:
        for i in trange(args.start_index, args.end_index):
            try:
                result = obj.generate_sample()
                f.write(json.dumps(result) + '\n')
                f.flush()
            except:
                continue