import json
import os
import random
from tqdm import tqdm
from config import LLM_MODELS, TTS_COMBINATIONS, VERIFIER_MODEL, CACHE_DIR
from reward_function import compute_eflops

# Configuration
EMBEDDINGS_FILE = os.path.join(CACHE_DIR, "query_embeddings_combined.json")
RESULTS_FILE = os.path.join(CACHE_DIR, "offline_results_mock.json")

def generate_mock_results():
    if not os.path.exists(EMBEDDINGS_FILE):
        print(f"Error: Embeddings file not found at {EMBEDDINGS_FILE}")
        return

    print("Loading query IDs from embeddings file...")
    with open(EMBEDDINGS_FILE, 'r', encoding='utf-8') as f:
        embeddings = json.load(f)
    
    query_ids = list(embeddings.keys())
    print(f"Found {len(query_ids)} queries.")

    results = {}

    for query_id in tqdm(query_ids, desc="Generating mock results"):
        query_results = {}
        
        #  (Model + QP/CP/BS)
        for model in LLM_MODELS:
            for qp, cp, bs in TTS_COMBINATIONS:
                action_key = f"{model}+qp{qp}cp{cp}bs{bs}"
                
                samples = []
                for _ in range(3):
                    token_len = random.randint(100, 500)
                    L_in = [100] 
                    L_out = [token_len]
                    
                    model_idx = LLM_MODELS.index(model)
                    base_acc = 0.3 + (model_idx * 0.1) # 0.3 to 0.8
                    parallel_boost = min(qp * cp, 64) / 64.0 * 0.15
                    prob_correct = min(0.95, base_acc + parallel_boost)
                    
                    is_correct = 1.0 if random.random() < prob_correct else 0.0
                    
                    # Simulate Verifier Score (strongly correlated with correctness but with random noise)
                    if is_correct:
                        verifier_score = random.uniform(0.7, 1.0)
                    else:
                        verifier_score = random.uniform(0.0, 0.6)
                        
                    # Calculate eFLOPs
                    eflops = compute_eflops(
                        selected_model=model,
                        L_in=L_in,
                        L_out=L_out,
                        qp=qp,
                        cp=cp,
                        verifier_model=VERIFIER_MODEL
                    )
                    
                    sample = {
                        "accuracy": is_correct,
                        "verifier_score": verifier_score,
                        "eflops": eflops,
                        "token_len": token_len,
                        "extracted_answer": f"MockAnswer_{model}_{qp}_{cp}_{bs}_{is_correct}"
                    }
                    samples.append(sample)
                
                query_results[action_key] = samples
        
        results[query_id] = query_results
    
    print(f"Saving results to {RESULTS_FILE}...")
    with open(RESULTS_FILE, 'w', encoding='utf-8') as f:
        json.dump(results, f, indent=2)
    print("Done.")

if __name__ == "__main__":
    generate_mock_results()
