import sys
sys.path.append('../')
import json
import time
import os
import uuid
import argparse
from typing import List, Dict, Any, Optional, Set
from pydantic import BaseModel
import MentalModel
from MentalModel import (tag_message, update_or_create_hypothesis, 
    evaluate_confidence, process_future_forward_hypotheses, HypothesisAction,
    ContentTag, ContentTags, tag_n_upsert
)
from LLMAgent import LLMBaseAgent
from faiss_store import FaissStore
from tqdm import tqdm
from collections import defaultdict
from MentalModelTypes import Hypothesis, Message, Evidence
# Change: Still import from MessageGenerator, but the interface has removed party_b_behavior and added party_b_desired_info
from MessageGenerator import generate_simple_message, select_best_message_with_rsa, generate_messages_ranked_by_committee


class ConversationTurn(BaseModel):
    """Single turn in a conversation with all associated data"""
    turn_number: int
    speaker: str
    message: str
    hypotheses_snapshot: List[Dict] = []
    updated_hypotheses: List[Dict] = []
    
    # Detailed hypothesis processing information
    tagged_chunks: List[Dict] = []  # Results from tag_message
    chunk_processing_details: List[Dict] = []  # Details for each chunk processing
    hypothesis_decisions: List[Dict] = []  # HypothesisAction decisions
    candidate_hypotheses_searched: List[Dict] = []  # Similar hypotheses found during search
    confidence_evaluations: List[Dict] = []  # Confidence evaluation details
    future_forward_processing: List[Dict] = []  # Future-forward hypothesis processing
    hypothesis_evidence: List[List[Dict]] = []  # Evidence for each updated hypothesis
    
    # Timing and performance info
    processing_time_ms: float = 0.0
    hypothesis_update_time_ms: float = 0.0
    message_generation_time_ms: float = 0.0

    # RSA analysis data (when RSA/committee used for Party A)
    rsa_analysis: Dict[str, Any] = {}


class ConversationResult(BaseModel):
    """Complete conversation result with all metadata"""
    scenario_id: int
    party_a: str
    party_b: str
    relationship: str
    background_context: str
    aspect_sensitivities: List[List[str]]
    turns: List[ConversationTurn]
    final_hypotheses: List[Dict]
    conversation_summary: Dict[str, Any]


def load_data(file_path: str) -> List[Dict]:
    """Load conversation data from JSON file"""
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

def load_processed_ids(files: List[str]) -> Set[int]:
    """Read existing JSONL result files and collect scenario_id values to skip."""
    processed: Set[int] = set()
    for path in files:
        if not path:
            continue
        if not os.path.exists(path):
            continue
        try:
            with open(path, 'r', encoding='utf-8') as f:
                for line in f:
                    line = line.strip()
                    if not line:
                        continue
                    try:
                        obj = json.loads(line)
                    except Exception:
                        continue
                    sid = obj.get('scenario_id')
                    if isinstance(sid, int):
                        processed.add(sid)
        except Exception:
            # If any issue reading a file, ignore and proceed with others
            continue
    return processed


def extract_scenario_fields(s: Dict[str, Any]) -> Dict[str, Any]:
    """Normalize fields from new seed schema to the pipeline's expected structure."""
    scenario_block = s.get('scenario', {}) or {}
    def _first_non_blank(*vals: Any) -> str:
        for v in vals:
            if isinstance(v, str) and v.strip() != "":
                return v.strip()
        # if all blank/None, return empty string
        return ""

    # Prefer nested scenario fields when present and non-blank; fall back to top-level ONLY (no synthetic defaults)
    party_a = _first_non_blank(scenario_block.get('party_a'), s.get('party_a'))
    party_b = _first_non_blank(scenario_block.get('party_b'), s.get('party_b'))
    relationship = _first_non_blank(scenario_block.get('relationship'), s.get('relationship'))
    background_context = _first_non_blank(scenario_block.get('background_context'), s.get('background_context'))

    party_a_background = _first_non_blank(s.get('party_a_background'), s.get('party_a_description'), scenario_block.get('party_a_background'), scenario_block.get('party_a_description'))
    party_b_background = _first_non_blank(s.get('party_b_background'), s.get('party_b_description'), scenario_block.get('party_b_background'), scenario_block.get('party_b_description'))
    # Change: Read party_b_desired_info from data; no longer read party_b_behavior
    party_b_desired_info = s.get('party_b_desired_info', [])

    # Aspect sensitivities: support legacy keys and new schema key `aspect_sensitivities_a`
    aspects_raw = (
        s.get('first_half_aspect_sensitivities')
        or s.get('aspect_sensitivities')
        or s.get('aspect_sensitivities_a')
        or []
    )
    if isinstance(aspects_raw, list) and (len(aspects_raw) == 0 or isinstance(aspects_raw[0], list)):
        aspect_sensitivities = aspects_raw
    elif isinstance(aspects_raw, list) and isinstance(aspects_raw[0], dict):
        # Convert list of dicts into List[List[str]] for compatibility
        aspect_sensitivities = [
            [
                item.get('aspect', ''),
                item.get('sensitivity_scale', str(item.get('sensitivity', '')))
            ]
            for item in aspects_raw
        ]
    else:
        aspect_sensitivities = []

    # Validate required fields: no None or empty string/list
    def _is_blank(val: Any) -> bool:
        if val is None:
            return True
        if isinstance(val, str) and val.strip() == "":
            return True
        return False

    normalized = {
        'party_a': party_a,
        'party_b': party_b,
        'relationship': relationship,
        'background_context': background_context,
        'party_a_background': party_a_background,
        'party_b_background': party_b_background,
        'party_b_desired_info': party_b_desired_info,
        'aspect_sensitivities': aspect_sensitivities,
    }

    missing = [k for k, v in normalized.items() if (k != 'aspect_sensitivities' and k != 'party_b_desired_info' and _is_blank(v))]
    if _is_blank(normalized.get('aspect_sensitivities')) or not isinstance(normalized['aspect_sensitivities'], list) or len(normalized['aspect_sensitivities']) == 0:
        missing.append('aspect_sensitivities')
    # Change: party_b_desired_info is optional, not treated as missing field for error reporting
    if missing:
        raise ValueError(f"Missing required scenario fields ({', '.join(missing)}) for scenario_id={s.get('scenario_id', 'unknown')}")

    return normalized

# Change: Function signature removes party_b_behavior, adds party_b_desired_info, and passes it to generate_simple_message
def generate_party_b_reply(
    party_a: str,
    party_b: str, 
    relationship: str,
    background_context: str,
    conversation_history: List[Dict],
    party_a_message: str,
    agent: LLMBaseAgent,
    party_a_background: str,
    party_b_background: str,
    party_b_desired_info: Optional[List[Dict[str, Any]]] = None,
) -> str:
    """Generate a simple LLM reply for Party B using generate_simple_message without hypothesis and RSA"""
    
    # Convert conversation history to the format expected by generate_simple_message
    prev_convo_for_generator = []
    for msg in conversation_history:
        prev_convo_for_generator.append({
            'speaker': msg['speaker'],
            'content': msg['content']
        })
    
    # Add the latest Party A message to the conversation
    prev_convo_for_generator.append({
        'speaker': 'Party A',
        'content': party_a_message
    })
    
    # Use generate_simple_message with empty hypotheses (simplest version)
    return generate_simple_message(
        party_a=party_a,
        party_b=party_b,
        relationship=relationship,
        current_speaker='Party B',
        other_speaker='Party A',
        background_context=background_context,
        prev_convo=prev_convo_for_generator,
        agent=agent,
        hypotheses=None,  # Empty hypotheses for simplest version
        # Change: No longer pass party_b_behavior; pass party_b_desired_info instead
        party_a_background=party_a_background,
        party_b_background=party_b_background,
        is_party_b=True,
        party_b_desired_info=party_b_desired_info,
    )


def process_hypothesis_updates_detailed(
    party_a: str,
    party_b: str,
    relationship: str,
    background_context: str,
    earlier_conversation: List[Dict],
    last_message: Dict,
    hyp_faiss: FaissStore,
    hyp2evi: Dict[str, List],
    agent: LLMBaseAgent
) -> Dict[str, Any]:
    """
    Detailed hypothesis processing using MentalModel.tag_n_upsert with full recording of each step
    """
    start_time = time.time()
    # import pdb; pdb.set_trace()
    
    # Store original global state
    original_hypfaiss = MentalModel.HypFaiss
    original_hyp2evi = MentalModel.Hyp2Evi
    
    # Switch to scenario-specific mental model context
    MentalModel.HypFaiss = hyp_faiss
    MentalModel.Hyp2Evi = hyp2evi
    
    try:
        # Use MentalModel.tag_n_upsert which handles the complete workflow (now with candidate→score→upsert)
        processing_start = time.time()
        
        decisions, hypotheses, evidence_lists = tag_n_upsert(
            party_a=party_a,
            party_b=party_b,
            relationship=relationship,
            background_context=background_context,
            earlier_conversation=earlier_conversation,
            last_message=last_message,
            agent=agent
        )
        processing_time = (time.time() - processing_start) * 1000
        
        # Extract detailed information for analysis
        hypothesis_decisions = []
        chunk_processing_details = []
        candidate_hypotheses_searched = []
        confidence_evaluations = []
        future_forward_processing = []
        tagged_chunks = []
        
        understanding_decisions = []
        future_decisions = []
        
        # Separate understanding and future-forward decisions/hypotheses
        for i, (decision, hypothesis, evidence_list) in enumerate(zip(decisions, hypotheses, evidence_lists)):
            # Record decision
            hypothesis_decisions.append(decision.model_dump())
            
            # Record confidence evaluation details
            conf_eval = {
                "hypothesis_id": hypothesis.hypothesis_id,
                "dimension_id": hypothesis.dimension_id,
                "confidence_score": hypothesis.confidence,
                "evidence_count": len(evidence_list),
                "evidence_texts": [ev.text for ev in evidence_list]
            }
            confidence_evaluations.append(conf_eval)
            
            # Separate understanding (1,2,3) from future-forward (4,5,6) hypotheses
            if hypothesis.dimension_id in [1, 2, 3]:
                understanding_decisions.append((decision, hypothesis, evidence_list))
                
                # For understanding hypotheses, reconstruct chunk information (approximate)
                chunk_info = {
                    "chunk_index": len(tagged_chunks),
                    "dimension_id": hypothesis.dimension_id,
                    "chunk_text": evidence_list[0].text if evidence_list else "",
                    "processing_time_ms": processing_time / max(1, len(decisions)),  # Estimate
                    "decision": decision.model_dump(),
                    "resulting_hypothesis": hypothesis.to_dict(),
                    "evidence_count": len(evidence_list)
                }
                chunk_processing_details.append(chunk_info)
                
                # Add to tagged chunks for compatibility
                if evidence_list:
                    tagged_chunks.append({
                        "dimension": hypothesis.dimension_id,
                        "text": evidence_list[0].text
                    })
                
                # Candidate search info (placeholder note)
                candidates_info = {
                    "dimension_id": hypothesis.dimension_id,
                    "chunk_index": len(candidate_hypotheses_searched),
                    "chunk_text": evidence_list[0].text if evidence_list else "",
                    "candidate_count": "handled_by_candidate_pipeline",
                    "candidates": "generated_and_scored"
                }
                candidate_hypotheses_searched.append(candidates_info)
                
            elif hypothesis.dimension_id in [4, 5, 6]:
                future_decisions.append((decision, hypothesis, evidence_list))
                
                # Record future-forward processing
                future_info = {
                    "dimension_id": hypothesis.dimension_id,
                    "decision": decision.model_dump(),
                    "hypothesis": hypothesis.to_dict(),
                    "evidence_count": len(evidence_list),
                    "confidence": hypothesis.confidence
                }
                future_forward_processing.append(future_info)
        
        # Optional: Re-evaluate confidence for all hypotheses if needed
        confidence_reevaluation_start = time.time()
        
        # Get all current hypotheses and optionally re-evaluate their confidence
        all_updated_hypotheses = []
        for dim in [1, 2, 3, 4, 5, 6]:
            dim_hyps = hyp_faiss.list_all_one(dim)
            for hyp in dim_hyps:
                # Get all evidence for this hypothesis
                hyp_evidence = hyp2evi.get(hyp.hypothesis_id, [])
                if hyp_evidence:
                    # Re-evaluate confidence using MentalModel.evaluate_confidence
                    new_confidence = evaluate_confidence(hyp, hyp_evidence, agent)
                    if new_confidence != hyp.confidence:
                        hyp.update_confidence(new_confidence)
                        hyp_faiss.update(hyp)
                
                all_updated_hypotheses.append(hyp.to_dict())
        
        confidence_reevaluation_time = (time.time() - confidence_reevaluation_start) * 1000
        
    finally:
        # Restore global context (ensuring clean state for other operations)
        MentalModel.HypFaiss = original_hypfaiss
        MentalModel.Hyp2Evi = original_hyp2evi
    
    total_time = (time.time() - start_time) * 1000
    
    return {
        "tagged_chunks": tagged_chunks,
        "chunk_processing_details": chunk_processing_details,
        "hypothesis_decisions": hypothesis_decisions,
        "candidate_hypotheses_searched": candidate_hypotheses_searched,
        "confidence_evaluations": confidence_evaluations,
        "future_forward_processing": future_forward_processing,
        "updated_hypotheses": [h.to_dict() for h in hypotheses],
        "hypothesis_evidence": [[e.to_dict() for e in ev_list] for ev_list in evidence_lists],
        "all_current_hypotheses": all_updated_hypotheses,
        "timing": {
            "total_time_ms": total_time,
            "tag_n_upsert_time_ms": processing_time,
            "confidence_reevaluation_time_ms": confidence_reevaluation_time,
            "understanding_hypotheses_count": len(understanding_decisions),
            "future_hypotheses_count": len(future_decisions)
        }
    }


# Helper to extract a serializable RSA analysis payload from MessageGenerationResult
def _extract_rsa_analysis_from_mg_result(result: Any) -> Dict[str, Any]:
    analysis: Dict[str, Any] = {}
    try:
        # Best candidate text
        best_text = None
        best_obj = getattr(result, 'best_candidate', None)
        if best_obj is not None:
            best_text = getattr(best_obj, 'text', None)
            if best_text is None and isinstance(best_obj, dict):
                best_text = best_obj.get('text')

        # Ranked candidates
        ranked_candidates: List[Dict[str, Any]] = []
        for rc in (getattr(result, 'ranked_candidates', None) or []):
            try:
                rank = getattr(rc, 'rank', None)
                if rank is None and isinstance(rc, dict):
                    rank = rc.get('rank')
                ranking_score = getattr(rc, 'ranking_score', None)
                if ranking_score is None and isinstance(rc, dict):
                    ranking_score = rc.get('ranking_score')
                cand = getattr(rc, 'candidate', None)
                if cand is None and isinstance(rc, dict):
                    cand = rc.get('candidate')
                cand_text = None
                if cand is not None:
                    cand_text = getattr(cand, 'text', None)
                    if cand_text is None and isinstance(cand, dict):
                        cand_text = cand.get('text')
                ranked_candidates.append({
                    'rank': rank,
                    'ranking_score': ranking_score,
                    'text': cand_text,
                })
            except Exception:
                continue

        # Candidate reply pairs
        candidate_reply_pairs: List[Dict[str, Any]] = []
        analysis_data = getattr(result, 'analysis_data', None)
        if analysis_data is None and isinstance(result, dict):
            analysis_data = result.get('analysis_data')
        if analysis_data is not None:
            pairs = getattr(analysis_data, 'candidate_reply_pairs', None)
            if pairs is None and isinstance(analysis_data, dict):
                pairs = analysis_data.get('candidate_reply_pairs')
            for p in (pairs or []):
                try:
                    idx = getattr(p, 'candidate_index', None)
                    if idx is None and isinstance(p, dict):
                        idx = p.get('candidate_index')
                    ctext = getattr(p, 'candidate_text', None)
                    if ctext is None and isinstance(p, dict):
                        ctext = p.get('candidate_text')
                    replies_out: List[Dict[str, Any]] = []
                    replies_in = getattr(p, 'replies', None)
                    if replies_in is None and isinstance(p, dict):
                        replies_in = p.get('replies')
                    for r in (replies_in or []):
                        rtext = getattr(r, 'text', None)
                        if rtext is None and isinstance(r, dict):
                            rtext = r.get('text')
                        rmeta = getattr(r, 'metadata', None)
                        if rmeta is None and isinstance(r, dict):
                            rmeta = r.get('metadata', {})
                        replies_out.append({'text': rtext, 'metadata': rmeta or {}})
                    candidate_reply_pairs.append({
                        'candidate_index': idx,
                        'candidate_text': ctext,
                        'replies': replies_out,
                    })
                except Exception:
                    continue

        analysis = {
            'best_candidate_text': best_text,
            'ranked_candidates': ranked_candidates,
            'candidate_reply_pairs': candidate_reply_pairs,
        }
    except Exception:
        analysis = {}
    return analysis


# Change: To pass party_b_desired_info downstream, extend function signature
def generate_mental_model_message(
    party_a: str,
    party_b: str,
    relationship: str,
    background_context: str,
    party_a_background: str,
    party_b_background: str,
    prev_convo: List[Dict],
    new_message: Message,
    hypotheses: List[Hypothesis],  # Now accepting Hypothesis objects directly
    agent: LLMBaseAgent,
    with_rsa: bool = False,
    with_committee: bool = False,
    party_b_desired_info: Optional[List[Dict[str, Any]]] = None,  # New addition
    # New: Separate future-oriented hypotheses (4-6) for A's generation and 1-3 dimensional hypotheses for analysis in RSA phase
    generation_hypotheses: Optional[List[Hypothesis]] = None,
    analysis_hypotheses: Optional[List[Hypothesis]] = None,
) -> Dict[str, Any]:
    """Generate a message using mental model with structured hypothesis integration.
    Returns a dict: { 'text': str, 'rsa_analysis': dict }
    """
    
    # Organize hypotheses by dimension following Mental Model structure
    understanding_hypotheses = []  # Dimensions 1-3: Knowledge, Behavior, Motive/Trust
    strategic_hypotheses = []      # Dimension 4: Strategic Direction/Policy
    next_steps_hypotheses = []     # Dimension 5: Information Gaps & Next Steps
    
    if hypotheses:
        for hyp in hypotheses:
            dimension_id = hyp.dimension_id
            if dimension_id in [1, 2, 3]:
                understanding_hypotheses.append(hyp)
            elif dimension_id == 4:
                strategic_hypotheses.append(hyp)
            elif dimension_id == 5:
                next_steps_hypotheses.append(hyp)
    
    # Convert conversation format for MessageGenerator
    prev_convo_for_generator = [
        {'speaker': msg['speaker'], 'content': msg['content']} 
        for msg in prev_convo
    ]
    
    # Convert Hypothesis objects to dict format for MessageGenerator
    hypotheses_dicts = [hyp.to_dict() for hyp in hypotheses] if hypotheses else []
    generation_hypotheses_dicts = [h.to_dict() for h in (generation_hypotheses or [])]
    analysis_hypotheses_dicts = [h.to_dict() for h in (analysis_hypotheses or [])]
    # Pull scenario-level backgrounds for Party A and B
    scenario_party_a_background = party_a_background
    scenario_party_b_background = party_b_background
    
    if with_rsa:
        if with_committee:
            # Change: Pass party_b_desired_info to committee version
            result = generate_messages_ranked_by_committee(
                party_a=party_a,
                party_b=party_b,
                relationship=relationship,
                background_context=background_context,
                prev_convo=prev_convo_for_generator,
                new_message=new_message,
                party_a_background=scenario_party_a_background,
                party_b_background=scenario_party_b_background,
                hypotheses=hypotheses_dicts,  # Maintain compatibility; generation/analysis below override specific usage
                agent=agent,
                # RSA phase should not use Party B's desired_info
                party_b_desired_info=None,
                # Only use future-oriented (4-6) for A's generation, use 1-3 for analysis
                hypotheses_generation=generation_hypotheses_dicts if generation_hypotheses_dicts else None,
                analysis_hypotheses=analysis_hypotheses_dicts if analysis_hypotheses_dicts else None,
            )
            rsa_analysis = _extract_rsa_analysis_from_mg_result(result)
            if hasattr(result.best_candidate, 'text'):
                best_text = result.best_candidate.text
            elif isinstance(result.best_candidate, dict) and 'text' in result.best_candidate:
                best_text = result.best_candidate['text']
            else:
                best_text = str(result.best_candidate)
            return {'text': best_text, 'rsa_analysis': rsa_analysis}
        else:
            # Change: Pass party_b_desired_info to RSA version
            result = select_best_message_with_rsa(
                party_a=party_a,
                party_b=party_b,
                relationship=relationship,
                background_context=background_context,
                prev_convo=prev_convo_for_generator,
                new_message=new_message,
                party_a_background=scenario_party_a_background,
                party_b_background=scenario_party_b_background,
                hypotheses=hypotheses_dicts,  # Compatibility parameter; generation/analysis below override
                agent=agent,
                # RSA phase should not use Party B's desired_info
                party_b_desired_info=None,
                hypotheses_generation=generation_hypotheses_dicts if generation_hypotheses_dicts else None,
                analysis_hypotheses=analysis_hypotheses_dicts if analysis_hypotheses_dicts else None,
            )
            rsa_analysis = _extract_rsa_analysis_from_mg_result(result)
            if hasattr(result.best_candidate, 'text'):
                best_text = result.best_candidate.text
            elif isinstance(result.best_candidate, dict) and 'text' in result.best_candidate:
                best_text = result.best_candidate['text']
            else:
                best_text = str(result.best_candidate)
            return {'text': best_text, 'rsa_analysis': rsa_analysis}
    else:
        # Use simple message generation with structured mental model context (Party A doesn't need desired_info)
        text = generate_simple_message(
            party_a=party_a,
            party_b=party_b,
            relationship=relationship,
            current_speaker='Party A',
            other_speaker='Party B',
            background_context=background_context,
            prev_convo=prev_convo_for_generator,
            agent=agent,
            hypotheses=hypotheses_dicts,  # Simple message generator expects dicts
            # Change: Removed party_b_behavior; Party A doesn't need desired_info
            party_a_background=scenario_party_a_background,
            party_b_background=scenario_party_b_background,
            is_party_b=False,
        )
        return {'text': text, 'rsa_analysis': {}}

# NEW: expand 4-6D hypotheses with neighbors (if store supports it)
def expand_future_hyps_with_neighbors(hyp_faiss: FaissStore, hyps: List[Hypothesis], top_k: int = 3) -> List[Hypothesis]:
    """
    If `hyp_faiss` exposes `search_by_text(dimension_id, text, top_k)`, use it to pull near neighbors;
    otherwise, just return the original list (no harm).
    Ensures uniqueness by hypothesis_id.
    """
    out: List[Hypothesis] = list(hyps)
    seen = {h.hypothesis_id for h in out}
    for h in hyps:
        if hasattr(hyp_faiss, "search_by_text"):
            try:
                neighbors = hyp_faiss.search_by_text(dimension_id=h.dimension_id, text=h.description, top_k=top_k)
                for nb in neighbors or []:
                    if getattr(nb, "hypothesis_id", None) and nb.hypothesis_id not in seen:
                        out.append(nb)
                        seen.add(nb.hypothesis_id)
            except Exception:
                continue
    return out


def run_conversation(
    scenario_data: Dict,
    max_turns: int = 10,
    with_rsa: bool = False,
    with_committee: bool = False,
    message_agent: LLMBaseAgent = None,  # New parameter for message generation agent
    hypothesis_agent: LLMBaseAgent = None  # New parameter for hypothesis processing agent
) -> ConversationResult:
    """
    Run a complete conversation between Party A (mental model) and Party B (simple LLM)
    """
    # Extract scenario information
    scenario_id = scenario_data['scenario_id']
    fields = extract_scenario_fields(scenario_data)
    party_a = fields['party_a']
    party_b = fields['party_b']
    relationship = fields['relationship']
    background_context = fields['background_context']
    
    # Get aspect sensitivities - try both possible field names
    aspect_sensitivities = fields['aspect_sensitivities']
    # Change: Extract party_b_desired_info for B's inquiries and A's candidate evaluation simulation
    party_b_desired_info = fields.get('party_b_desired_info', [])
    
    # Initialize conversation state
    conversation_history = []
    turns = []
    
    # Create fresh Mental Model instances for this scenario
    scenario_hyp_faiss = FaissStore()
    scenario_hyp2evi = defaultdict(list)
    
    # Verify fresh mental model state
    initial_hyp_count = sum(len(scenario_hyp_faiss.list_all_one(dim)) for dim in [1,2,3,4,5,6])
    
    if 'conversation' in scenario_data and isinstance(scenario_data['conversation'], list):
        for msg in scenario_data['conversation']:
            if "Party B" in msg.get('speaker', ''):
                conversation_history.append({
                    'speaker': 'Party B',
                    'content': msg.get('content', '')
                })
                break
            else:
                conversation_history.append({
                    'speaker': 'Party A',
                    'content': msg.get('content', '')
                })
    
    # Run conversation turns
    turn_progress = tqdm(range(max_turns), desc=f"Scenario {scenario_id} turns",  unit="scenario")
    for turn_num in turn_progress:
        # Get the last message (should be from Party B for Party A to respond to)
        if not conversation_history or "Party A" in conversation_history[-1]['speaker']:
            # Party B's turn
            last_party_a_msg = conversation_history[-1]['content'] if conversation_history else ""
            # Change: Call new generate_party_b_reply, no longer pass behavior, pass desired_info
            # Use message_agent instead of MentalModel.agent for Party B message generation
            party_b_reply = generate_party_b_reply(
                party_a=party_a,
                party_b=party_b,
                relationship=relationship,
                background_context=background_context,
                conversation_history=conversation_history[:-1] if conversation_history else [],
                party_a_message=last_party_a_msg,
                agent=message_agent,  # Use the dedicated message generation agent
                party_a_background=fields['party_a_background'],
                party_b_background=fields['party_b_background'],
                party_b_desired_info=party_b_desired_info,
            )
            
            conversation_history.append({
                'speaker': 'Party B',
                'content': party_b_reply
            })
            
            # Record Party B's turn (no mental model processing for simple LLM)
            turn = ConversationTurn(
                turn_number=turn_num + 1,
                speaker='Party B',
                message=party_b_reply,
                hypotheses_snapshot=[],
                updated_hypotheses=[],
                
                # Detailed hypothesis processing information (empty for Party B)
                tagged_chunks=[],
                chunk_processing_details=[],
                hypothesis_decisions=[],
                candidate_hypotheses_searched=[],
                confidence_evaluations=[],
                future_forward_processing=[],
                hypothesis_evidence=[],
                
                # Timing and performance info (zero for Party B)
                processing_time_ms=0.0,
                hypothesis_update_time_ms=0.0,
                message_generation_time_ms=0.0
            )
            turns.append(turn)
            continue
        
        # Party A's turn with mental model
        last_message = conversation_history[-1]

        # Take snapshot of current hypotheses
        current_hypotheses = []
        for dim in [1, 2, 3, 4, 5, 6]:
            dim_hyps = scenario_hyp_faiss.list_all_one(dim)
            current_hypotheses.extend([h.to_dict() for h in dim_hyps])

        # Decide whether to run hypothesis processing now
        # Use conversation history BEFORE the last message as context
        earlier_convo = conversation_history[:-1]
        has_party_b_turn = any(t.speaker == 'Party B' for t in turns)
        reach_min_turn_for_hyp = (turn_num >= 2)  # Keep mental model activation after third message

        understanding_hypotheses = []
        future_hypotheses = []
        hyp_processing_time = 0.0

        if earlier_convo and has_party_b_turn and reach_min_turn_for_hyp:
            # Process hypotheses with detailed recording
            hyp_start_time = time.time()

            # Switch global state inside this helper call
            hypothesis_processing = process_hypothesis_updates_detailed(
                party_a=party_a,
                party_b=party_b,
                relationship=relationship,
                background_context=background_context,
                earlier_conversation=earlier_convo,
                last_message=last_message,
                hyp_faiss=scenario_hyp_faiss,
                hyp2evi=scenario_hyp2evi,
                agent=hypothesis_agent
            )

            hyp_processing_time = (time.time() - hyp_start_time) * 1000
            updated_hypotheses = hypothesis_processing["all_current_hypotheses"]

            # Get understanding hypotheses (dimensions 1-3) from current state
            for dim in [1, 2, 3]:
                dim_hyps = scenario_hyp_faiss.list_all_one(dim)
                understanding_hypotheses.extend(dim_hyps)

            # Generate fresh future hypotheses (dimensions 4-6)
            original_hypfaiss = MentalModel.HypFaiss
            original_hyp2evi = MentalModel.Hyp2Evi
            MentalModel.HypFaiss = scenario_hyp_faiss
            MentalModel.Hyp2Evi = scenario_hyp2evi
            try:
                future_results = process_future_forward_hypotheses(party_a, party_b, relationship, background_context, hypothesis_agent)
                future_hypotheses = [hyp for _, hyp, _ in future_results]
            finally:
                MentalModel.HypFaiss = original_hypfaiss
                MentalModel.Hyp2Evi = original_hyp2evi
        else:
            # Skip hypothesis processing for now
            updated_hypotheses = current_hypotheses
            # Provide empty placeholders so later references are safe
            hypothesis_processing = {
                "tagged_chunks": [],
                "chunk_processing_details": [],
                "hypothesis_decisions": [],
                "candidate_hypotheses_searched": [],
                "confidence_evaluations": [],
                "future_forward_processing": [],
                "hypothesis_evidence": [],
                "all_current_hypotheses": current_hypotheses,
            }

        # Combine all hypotheses for message generation
        # NEW: expand future hyps with neighbors if store supports it
        future_hypotheses_expanded = expand_future_hyps_with_neighbors(scenario_hyp_faiss, future_hypotheses, top_k=3)
        all_hypotheses = understanding_hypotheses + future_hypotheses_expanded
        
        # Generate message using mental model (with or without RSA)
        generation_method = "RSA" if with_rsa else "Simple"
        msg_gen_start = time.time()
        
        # Convert conversation history to format expected by mental model generation
        prev_convo_for_generator = [
            {'speaker': msg['speaker'], 'content': msg['content']} 
            for msg in conversation_history[:-1]
        ]
        new_message_for_generator = Message(text=last_message['content'], sender=last_message['speaker'])
        
        # Pass hypotheses objects directly (no need to convert to dict and back)
        hypotheses_for_generator = all_hypotheses
        
        # Change: Continue passing party_b_desired_info downstream to generation function
        # Use message_agent instead of MentalModel.agent for message generation
        generated_payload = generate_mental_model_message(
            party_a=party_a,
            party_b=party_b,
            relationship=relationship,
            background_context=background_context,
            party_a_background=fields['party_a_background'],
            party_b_background=fields['party_b_background'],
            prev_convo=prev_convo_for_generator,
            new_message=new_message_for_generator,
            hypotheses=hypotheses_for_generator,
            agent=message_agent,  # Use the dedicated message generation agent
            with_rsa=with_rsa,
            with_committee=with_committee,
            party_b_desired_info=party_b_desired_info,  # Only used for non-RSA cases
            # RSA: Candidate generation uses all dimensional hypotheses (1-6), evaluation analysis still uses 1-3 dimensions
            generation_hypotheses=all_hypotheses,
            analysis_hypotheses=understanding_hypotheses,
        )
        
        msg_gen_time = (time.time() - msg_gen_start) * 1000
        generated_message = generated_payload.get('text') if isinstance(generated_payload, dict) else str(generated_payload)
        rsa_analysis_payload = generated_payload.get('rsa_analysis', {}) if isinstance(generated_payload, dict) else {}
        
        # Add Party A's message to conversation
        conversation_history.append({
            'speaker': 'Party A',
            'content': generated_message
        })
        
        # Record this turn with detailed information  
        total_turn_time = hyp_processing_time + msg_gen_time
        
        turn = ConversationTurn(
            turn_number=turn_num + 1,
            speaker='Party A',
            message=generated_message,
            hypotheses_snapshot=current_hypotheses,
            updated_hypotheses=updated_hypotheses,
            
            # Detailed hypothesis processing information
            tagged_chunks=hypothesis_processing["tagged_chunks"],
            chunk_processing_details=hypothesis_processing["chunk_processing_details"],
            hypothesis_decisions=hypothesis_processing["hypothesis_decisions"],
            candidate_hypotheses_searched=hypothesis_processing["candidate_hypotheses_searched"],
            confidence_evaluations=hypothesis_processing["confidence_evaluations"],
            future_forward_processing=hypothesis_processing["future_forward_processing"],
            hypothesis_evidence=hypothesis_processing["hypothesis_evidence"],
            
            # Timing and performance info
            processing_time_ms=total_turn_time,
            hypothesis_update_time_ms=hyp_processing_time,
            message_generation_time_ms=msg_gen_time,
            rsa_analysis=rsa_analysis_payload
        )
        turns.append(turn)
    
    # Get final hypotheses state
    final_hypotheses = []
    final_hyp_by_dim = {}
    for dim in [1, 2, 3, 4, 5, 6]:
        dim_hyps = scenario_hyp_faiss.list_all_one(dim)
        final_hypotheses.extend([h.to_dict() for h in dim_hyps])
        final_hyp_by_dim[dim] = len(dim_hyps)
    
    # Log final mental model state
    
    # Create conversation summary
    summary = {
        'total_turns': len(turns),
        'party_a_turns': len([t for t in turns if t.speaker == 'Party A']),
        'party_b_turns': len([t for t in turns if t.speaker == 'Party B']),
        'total_hypotheses': len(final_hypotheses),
        'hypotheses_by_dimension': {dim: len([h for h in final_hypotheses if h['dimension_id'] == dim]) for dim in [1, 2, 3, 4, 5, 6]}
    }
    
    result = ConversationResult(
        scenario_id=scenario_id,
        party_a=party_a,
        party_b=party_b,
        relationship=relationship,
        background_context=background_context,
        aspect_sensitivities=aspect_sensitivities,
        turns=turns,
        final_hypotheses=final_hypotheses,
        conversation_summary=summary
    )
    
    # Also return result so caller can write back to input file
    return result


def save_result_jsonl(result: ConversationResult, output_file: str):
    """Save single conversation result to JSONL file (append mode)"""
    # Support both Pydantic models and plain dicts
    try:
        if hasattr(result, 'model_dump'):
            result_dict = result.model_dump()
        elif isinstance(result, dict):
            result_dict = result
        elif hasattr(result, 'json'):
            result_dict = json.loads(result.json())
        else:
            result_dict = {"data": str(result)}
    except Exception:
        try:
            result_dict = json.loads(json.dumps(result, default=lambda o: getattr(o, "__dict__", str(o))))
        except Exception:
            result_dict = {"data": str(result)}
    
    with open(output_file, 'a', encoding='utf-8') as f:
        json.dump(result_dict, f, ensure_ascii=False)
        f.write('\n')  # JSONL format: one JSON object per line

def save_results(results: List[ConversationResult], output_file: str):
    """Save conversation results to JSONL file (for backward compatibility)"""
    # Clear the file first
    with open(output_file, 'w', encoding='utf-8') as f:
        pass
    
    # Write each result as a separate line
    for result in results:
        save_result_jsonl(result, output_file)


def save_detailed_analysis(results: List[ConversationResult], base_filename: str):
    """Save detailed analysis including hypothesis evolution and decision patterns"""
    
    # Create detailed analysis structure
    analysis = {
        "summary": {
            "total_scenarios": len(results),
            "successful_scenarios": len([r for r in results if r.turns]),
            "total_turns": sum(len(r.turns) for r in results),
            "total_party_a_turns": sum(r.conversation_summary['party_a_turns'] for r in results),
            "total_hypotheses_generated": sum(r.conversation_summary['total_hypotheses'] for r in results),
            "avg_hypotheses_per_scenario": sum(r.conversation_summary['total_hypotheses'] for r in results) / max(1, len(results))
        },
        "scenarios": []
    }
    
    # Process each scenario
    for result in results:
        scenario_analysis = {
            "scenario_id": result.scenario_id,
            "party_a": result.party_a,
            "party_b": result.party_b,
            "relationship": result.relationship,
            "background_context": result.background_context,
            "conversation_flow": [],
            "hypothesis_evolution": {},
            "decision_patterns": {}
        }
        
        # Track hypothesis evolution by dimension
        for dim in [1, 2, 3, 4, 5, 6]:
            scenario_analysis["hypothesis_evolution"][f"dimension_{dim}"] = {
                "initial_count": 0,
                "final_count": len([h for h in result.final_hypotheses if h['dimension_id'] == dim]),
                "evolution_steps": []
            }
        
        # Process each turn
        decision_counts = {"new": 0, "merge": 0}
        
        for turn in result.turns:
            turn_info = {
                "turn_number": turn.turn_number,
                "speaker": turn.speaker,
                "message_preview": turn.message,
                "hypothesis_changes": len(turn.hypothesis_decisions),
                
                # Detailed processing information
                "chunks_tagged": len(turn.tagged_chunks),
                "dimensions_processed": list(set([chunk["dimension"] for chunk in turn.tagged_chunks])) if turn.tagged_chunks else [],
                "candidate_hypotheses_found": 0,  # conservative; pipeline generates candidates internally
                "confidence_evaluations_count": len(turn.confidence_evaluations),
                "future_hypotheses_processed": len(turn.future_forward_processing),
                
                # Timing information
                "processing_time_ms": turn.processing_time_ms,
                "hypothesis_update_time_ms": turn.hypothesis_update_time_ms,
                "message_generation_time_ms": turn.message_generation_time_ms,
                
                # Decision breakdown
                "new_hypotheses": len([d for d in turn.hypothesis_decisions if d.get("decision") == "new"]),
                "merged_hypotheses": len([d for d in turn.hypothesis_decisions if d.get("decision") == "merge"])
            }
            
            # Analyze hypothesis decisions
            for decision in turn.hypothesis_decisions:
                if isinstance(decision, dict) and 'decision' in decision:
                    decision_counts[decision['decision']] = decision_counts.get(decision['decision'], 0) + 1
                    
                    # Track hypothesis evolution
                    if 'dimension_id' in decision or any('dimension' in str(k) for k in decision.keys()):
                        dim_info = "unknown"
                        if 'dimension_id' in decision:
                            dim_info = f"dimension_{decision['dimension_id']}"
                        
                        evolution_step = {
                            "turn": turn.turn_number,
                            "decision": decision['decision'],
                            "description": decision.get('new_hypothesis_desc', decision.get('updated_hypothesis_desc', 'N/A'))
                        }
                        
                        if dim_info in scenario_analysis["hypothesis_evolution"]:
                            scenario_analysis["hypothesis_evolution"][dim_info]["evolution_steps"].append(evolution_step)
            
            scenario_analysis["conversation_flow"].append(turn_info)
        
        # Compile decision patterns
        total_decisions = sum(decision_counts.values())
        scenario_analysis["decision_patterns"] = {
            "decision_counts": decision_counts,
            "total_decisions": total_decisions,
            "merge_ratio": (decision_counts["merge"] / total_decisions) if total_decisions else 0.0,
            "new_hypothesis_ratio": (decision_counts["new"] / total_decisions) if total_decisions else 0.0
        }
        
        analysis["scenarios"].append(scenario_analysis)
    
    # Save detailed analysis (handle both .json and .jsonl extensions)
    if base_filename.endswith('.jsonl'):
        analysis_file = base_filename.replace('.jsonl', '_detailed_analysis.json')
        summary_file = base_filename.replace('.jsonl', '_summary_report.txt')
        raw_data_file = base_filename.replace('.jsonl', '_raw_processing_data.json')
    else:
        analysis_file = base_filename.replace('.json', '_detailed_analysis.json')
        summary_file = base_filename.replace('.json', '_summary_report.txt')
        raw_data_file = base_filename.replace('.json', '_raw_processing_data.json')
    
    with open(analysis_file, 'w', encoding='utf-8') as f:
        json.dump(analysis, f, indent=2, ensure_ascii=False)
    
    # Save summary report
    with open(summary_file, 'w', encoding='utf-8') as f:
        f.write("=== MENTAL MODEL CONVERSATION ANALYSIS REPORT ===\n\n")
        f.write(f"Total Scenarios Processed: {analysis['summary']['total_scenarios']}\n")
        f.write(f"Successful Scenarios: {analysis['summary']['successful_scenarios']}\n")
        f.write(f"Total Conversation Turns: {analysis['summary']['total_turns']}\n")
        f.write(f"Total Party A Turns: {analysis['summary']['total_party_a_turns']}\n")
        f.write(f"Total Hypotheses Generated: {analysis['summary']['total_hypotheses_generated']}\n")
        f.write(f"Average Hypotheses per Scenario: {analysis['summary']['avg_hypotheses_per_scenario']:.2f}\n\n")
        
        f.write("=== PER-SCENARIO SUMMARY ===\n")
        for scenario in analysis["scenarios"]:
            f.write(f"\nScenario {scenario['scenario_id']}:\n")
            f.write(f"  Party A: {scenario['party_a']}\n")
            f.write(f"  Party B: {scenario['party_b']}\n")
            f.write(f"  Relationship: {scenario['relationship']}\n")
            f.write(f"  Turns: {len(scenario['conversation_flow'])}\n")
            f.write(f"  New/Merge Ratio: {scenario['decision_patterns']['new_hypothesis_ratio']:.2f} / {scenario['decision_patterns']['merge_ratio']:.2f}\n")
            
            # Processing time analysis
            total_processing_time = sum([turn_info["processing_time_ms"] for turn_info in scenario["conversation_flow"]])
            total_hyp_time = sum([turn_info["hypothesis_update_time_ms"] for turn_info in scenario["conversation_flow"]])
            total_msg_time = sum([turn_info["message_generation_time_ms"] for turn_info in scenario["conversation_flow"]])
            
            f.write(f"  Total Processing Time: {total_processing_time:.1f}ms\n")
            f.write(f"  Hypothesis Update Time: {total_hyp_time:.1f}ms ({(total_hyp_time/max(1,total_processing_time)*100 if total_processing_time else 0):.1f}%)\n")
            f.write(f"  Message Generation Time: {total_msg_time:.1f}ms ({(total_msg_time/max(1,total_processing_time)*100 if total_processing_time else 0):.1f}%)\n")
            
            # Chunk processing analysis
            total_chunks = sum([turn_info["chunks_tagged"] for turn_info in scenario["conversation_flow"]])
            dimensions_used = set()
            for turn_info in scenario["conversation_flow"]:
                for d in turn_info.get("dimensions_processed", []):
                    dimensions_used.add(d)
            
            f.write(f"  Total Chunks Processed: {total_chunks}\n")
            f.write(f"  Dimensions Used: {sorted(list(dimensions_used))}\n")
            
            # Hypothesis evolution summary
            f.write("  Final Hypotheses by Dimension:\n")
            for dim_key, dim_data in scenario['hypothesis_evolution'].items():
                f.write(f"    {dim_key}: {dim_data['final_count']} hypotheses\n")
    
    # Save raw step-by-step data for deep analysis (file path already determined above)
    save_raw_processing_data(results, raw_data_file)
    
    return analysis_file, summary_file, raw_data_file


def save_raw_processing_data(results: List[ConversationResult], output_file: str):
    """Save raw step-by-step processing data for detailed analysis"""
    raw_data = {
        "metadata": {
            "total_scenarios": len(results),
            "generated_at": time.time(),
            "processing_version": "detailed_v1"
        },
        "scenarios": []
    }
    
    for result in results:
        scenario_raw = {
            "scenario_id": result.scenario_id,
            "party_a": result.party_a,
            "party_b": result.party_b,
            "relationship": result.relationship,
            "background_context": result.background_context,
            "turns": []
        }
        
        for turn in result.turns:
            if turn.speaker == 'Party A':  # Only include detailed processing for Party A turns
                turn_raw = {
                    "turn_number": turn.turn_number,
                    "speaker": turn.speaker,
                    "message": turn.message,
                    "processing_details": {
                        "tagged_chunks": turn.tagged_chunks,
                        "chunk_processing": turn.chunk_processing_details,
                        "candidate_hypotheses_searched": turn.candidate_hypotheses_searched,
                        "hypothesis_decisions": turn.hypothesis_decisions,
                        "confidence_evaluations": turn.confidence_evaluations,
                        "future_forward_processing": turn.future_forward_processing,
                        "hypothesis_evidence": turn.hypothesis_evidence,
                        "rsa_analysis": turn.rsa_analysis,
                        "timing": {
                            "total_time_ms": turn.processing_time_ms,
                            "hypothesis_update_time_ms": turn.hypothesis_update_time_ms,
                            "message_generation_time_ms": turn.message_generation_time_ms
                        }
                    },
                    # Note: RSA message candidate analysis removed - using direct generation
                    
                    "hypotheses_before": turn.hypotheses_snapshot,
                    "hypotheses_after": turn.updated_hypotheses
                }
            else:  # Party B turns - minimal info
                turn_raw = {
                    "turn_number": turn.turn_number,
                    "speaker": turn.speaker,
                    "message": turn.message
                }
            
            scenario_raw["turns"].append(turn_raw)
        
        # Add final hypotheses state
        scenario_raw["final_hypotheses"] = result.final_hypotheses
        scenario_raw["conversation_summary"] = result.conversation_summary
        
        raw_data["scenarios"].append(scenario_raw)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(raw_data, f, indent=2, ensure_ascii=False)


def main():
    """Main function to run mental model conversation pipeline"""
    parser = argparse.ArgumentParser(description='Run mental model conversation pipeline')
    parser.add_argument('--model', type=str, default='gpt-4o', 
                       help='Model to use for LLM generation (default: gpt-4o)')
    parser.add_argument('--temperature', type=float, default=1.0,
                       help='Temperature for LLM generation (default: 1.0)')
    parser.add_argument('--max-scenarios', type=int, default=100,
                       help='Maximum number of scenarios to process (default: 100)')
    parser.add_argument('--max-turns', type=int, default=20,
                       help='Maximum number of conversation turns (default: 20)')
    parser.add_argument('--data-file', type=str, required=True,
                       help='Path to input data file')
    parser.add_argument('--results-dir', type=str, required=True,
                       help='Base directory for results')
    parser.add_argument('--with-rsa', action='store_true',
                       help='Use RSA-based message generation for Party A (default: False)')
    parser.add_argument('--with-committee', action='store_true',
                       help='Use committee-based message generation with RSA for Party A (default: False)')
    
    args = parser.parse_args()
    
    # Initialize agent with specified parameters for message generation
    agent_config = {
        'model': args.model,
        'temperature': args.temperature
    }
    message_agent = LLMBaseAgent(agent_config)
    print(f"Initializing message generation agent with model: {args.model}")
    
    # Initialize MentalModel's agent (for hypothesis updating) with separate config if needed
    # For now, we'll keep the same model but this could be made configurable
    hypothesis_agent_config = {
        'model': args.model,  # Could be different model for hypothesis processing
        'temperature': args.temperature
    }
    hypothesis_agent = LLMBaseAgent(hypothesis_agent_config)
    print(f"Initializing hypothesis processing agent with model: {args.model}")
    
    # Load data
    print(f"Loading data from {args.data_file}...")
    scenarios = load_data(args.data_file)
    print(f"Loaded {len(scenarios)} scenarios")
    
    # Prepare output files BEFORE starting the loop
    timestamp = int(time.time())
    os.makedirs(args.results_dir, exist_ok=True)
    rsa_name = str(args.with_rsa).lower()
    committee_name = str(args.with_committee).lower()
    output_file = f"{args.results_dir}/mental_model_conversations_with_rsa_{rsa_name}_committee_{committee_name}.jsonl"
    conversation_with_meta_data_file = f"{args.results_dir}/mental_model_conversations_with_meta_data_with_rsa_{rsa_name}_committee_{committee_name}.jsonl"
    # Do NOT clear existing results; allow resuming and skipping processed scenarios
    print(f"📝 Using JSONL output file: {output_file}")
    
    # Load already processed scenario ids from existing result files (if present)
    processed_ids = load_processed_ids([output_file, conversation_with_meta_data_file])

    # Run conversations for each scenario - each gets fresh mental model
    results = []
    print(f"\n📊 Processing {len(scenarios)} scenarios with independent mental models...")
    print("Each scenario will start with a completely fresh mental model state.")
    print("💾 Results will be written in JSONL format (one scenario per line) as they complete.\n")
    scenario_progress = tqdm(scenarios[:args.max_scenarios], desc="Processing scenarios", unit="scenario")
    for i, scenario in enumerate(scenario_progress):
        sid_safe = scenario.get('scenario_id', scenario.get('id', 'unknown'))
        # Validate required fields early; skip if missing
        try:
            _fields_preview = extract_scenario_fields(scenario)
        except Exception as ve:
            print(f"⏭️  Skipping scenario {sid_safe}: {ve}")
            continue

        print(f"\n{'='*60}")
        print(f"🚀 SCENARIO {sid_safe} ({i+1}/{len(scenarios)}) - FRESH START")
        scenario_block = scenario.get('scenario', {}) or {}
        print(f"Party A: {scenario_block.get('party_a', '')}")
        print(f"Party B: {scenario_block.get('party_b', '')}...")
        print(f"Model: {args.model}, RSA: {args.with_rsa}")
        print(f"{'='*60}")
        
        try:
            # Skip scenarios that already exist in results file(s)
            sid = scenario.get('scenario_id')
            if isinstance(sid, int) and sid in processed_ids:
                print(f"⏭️  Skipping scenario {sid}: already present in {output_file}")
                continue

            # Pass the message_agent and hypothesis_agent to run_conversation
            result = run_conversation(
                scenario, 
                max_turns=args.max_turns, 
                with_rsa=args.with_rsa, 
                with_committee=args.with_committee,
                message_agent=message_agent,  # Pass the dedicated message generation agent
                hypothesis_agent=hypothesis_agent  # Pass the dedicated hypothesis processing agent
            )
            results.append(result)
            generated_turns = [
                        {"turn_number": t.turn_number, "speaker": t.speaker, "content": t.message}
                        for t in result.turns
                    ]
            scenario['final_conversation'] = generated_turns
            # Immediately save this result to JSONL file
            save_result_jsonl(result, output_file)
            save_result_jsonl(scenario, conversation_with_meta_data_file)

        except Exception as e:
            print(f"❌ Error in scenario {sid_safe}: {str(e)}")
            continue
    
    # Save detailed analysis (based on all collected results)
    analysis_file, summary_file, raw_data_file = save_detailed_analysis(results, output_file)
    
    print(f"\n{'='*60}")
    print("🎉 PIPELINE COMPLETE")
    print(f"Model used: {args.model}")
    print(f"With RSA: {args.with_rsa}")
    print(f"With Committee: {args.with_committee}")
    print(f"Processed {len(results)}/{len(scenarios)} scenarios successfully")
    print(f"\n📈 Mental Model Isolation Summary:")
    print(f"  • Each of the {len(results)} scenarios used an independent mental model")
    print(f"  • Total hypotheses across all scenarios: {sum(r.conversation_summary['total_hypotheses'] for r in results)}")
    print(f"  • Average hypotheses per scenario: {sum(r.conversation_summary['total_hypotheses'] for r in results) / max(1, len(results)):.1f}")
    print(f"  • No cross-contamination between scenarios ✓")
    print(f"\n🧠 Mental Model Structure & Message Generation:")
    print(f"  • Following 6-dimensional Mental Model framework:")
    print(f"    - Dimensions 1-3: Understanding (Knowledge, Behavior, Trust/Motive)")
    print(f"    - Dimension 4: Strategic Direction & Policy")
    print(f"    - Dimension 5: Information Gaps & Next Steps")
    print(f"    - Dimension 6: Privacy/Sensitivity Assessment")
    print(f"  • Using separate agents for:")
    print(f"    - Hypothesis processing: MentalModel.agent (update/evaluate hypotheses)")
    print(f"    - Message generation: message_agent (read hypotheses & generate messages)")
    print(f"  • Using MentalModel functions for robust processing ✓")
    print(f"    - tag_n_upsert: candidate→score→upsert for understanding (1-3) + future-forward (4-6)")
    print(f"    - process_future_forward_hypotheses: candidate→score→upsert (seeded by 1-3)")
    print(f"    - evaluate_confidence: committee-based confidence scoring")
    if args.with_rsa:
        if args.with_committee:
            print(f"  • Message generation: Committee-based voting with RSA and full hypothesis integration")
            print(f"  • Party A uses committee voting to select best message considering all 6 dimensions")
        else:
            print(f"  • Message generation: RSA-based with full hypothesis integration")
            print(f"  • Party A considers all 6 dimensions when generating responses")
    else:
        print(f"  • Message generation: Direct with full hypothesis integration")
        print(f"  • Party A considers all 6 dimensions when generating responses")
    print(f"  • Future (4–6) hypotheses are expanded with near neighbors when available ✓")
    print(f"\n📊 JSONL Format Benefits:")
    print(f"  • Each scenario written immediately upon completion")
    print(f"  • Progressive results available during long runs")
    print(f"  • Easy to resume/continue interrupted experiments")
    print(f"  • Compatible with streaming data processing tools")
    print(f"\n📁 Output Files:")
    print(f"  • Main Results (JSONL): {output_file}")
    print(f"  • Detailed Analysis: {analysis_file}")
    print(f"  • Summary Report: {summary_file}")
    print(f"  • Raw Processing Data: {raw_data_file}")
    print(f"{'='*60}")


if __name__ == "__main__":
    main()
