import pandas as pd
from tqdm import tqdm  # 进度条工具
from utils.ECE import *
from utils.utils import *
import json
import os
from utils.conversation import conv_templates
import numpy as np

def test_model_multiple(model, args, repeat=1):
    dataset_name = args.dataset_name
    split = args.split
    accuracies = []

    acc_dir = "./result/ACC/MMBench"
    os.makedirs(acc_dir, exist_ok=True)
    out_json_path = os.path.join(acc_dir, f"{model.name}.json")

    open(out_json_path, "w").close()

    data_full = pd.read_parquet(args.data_path)
    print("args.num_test_samples", args.num_test_samples)
    print("len(data_full)", len(data_full))
    for rep in range(repeat):
        print(f"\n🔁 Round {rep + 1} / {repeat} ...")

        data = data_full.sample(n=args.num_test_samples, random_state=rep).reset_index(drop=True)
        correct = 0
        count = 0

        for index, row in tqdm(data.iterrows(), desc=f"Testing model (rep {rep+1})", total=len(data)):
            prompt = (
                f"{row['question']}\n"
                f"A: {row.get('A', '')}\n"
                f"B: {row.get('B', '')}\n"
                f"C: {row.get('C', '')}\n"
                f"D: {row.get('D', '')}\n"
                "Please answer directly with one option letter (A/B/C/D):\n"
            )
            conv = conv_templates[args.conv_mode].copy()
            conv.append_message(conv.roles[0], prompt)
            conv.append_message(conv.roles[1], None)
            prompt = conv.get_prompt()

            answer = row.get('answer', '')
            image_bytes = row['image']["bytes"]

            model.generate_prompt(prompt)
            output = model.get_answer(image_bytes)
            if output is None:
                continue
            else:
                raw_predicted, inputs, mo = output

            current_logits = mo[:, -1, :]
            option_letters = ["A", "B", "C", "D"]
            option_ids = [model.tokenizer.encode(letter, add_special_tokens=False)[0] for letter in option_letters]
            option_logits = current_logits[:, option_ids]
            option_softmax = F.softmax(option_logits, dim=1)
            confidences, max_indices = torch.max(option_softmax, dim=1)
            decoded = [option_letters[i] for i in max_indices.cpu().numpy()]
            decoded_letter = decoded[0] if len(decoded) > 0 else ""

            record = {
                "idx": int(index),                      
                "question": row.get("question", ""),    
                "raw_out": str(raw_predicted),          
                "pred": decoded_letter,                 
                "gt_answers": [str(answer)],            
                "rep": rep + 1,                      
            }
            with open(out_json_path, "a", encoding="utf-8") as jf:
                jf.write(json.dumps(record, ensure_ascii=False) + "\n")

            is_correct = (decoded_letter == answer)
            correct += int(is_correct)
            count += 1

        acc = correct / count
        print(f"✅ Round {rep + 1} accuracy: {acc:.4f}")
        accuracies.append(acc)

    mean_acc = np.mean(accuracies)
    std_acc = np.std(accuracies)

    report = {
        'model': model.name,
        'dataset': dataset_name,
        'split': split,
        'mean_accuracy': round(mean_acc, 4),
        'std_accuracy': round(std_acc, 4),
        'sample_count': count,
        'repeats': repeat
    }

    report_path = './result/ACC/MMBench/lora_acc.csv'
    df = pd.DataFrame([report])

    try:
        os.makedirs(os.path.dirname(report_path), exist_ok=True)
        with open(report_path, 'a', encoding='utf-8') as f:
            df.to_csv(f, header=not os.path.exists(report_path), index=False)
            f.flush()
            os.fsync(f.fileno())
        print(f"📄 Accuracy report saved to: {os.path.abspath(report_path)}")
        print(f"🧾 Sample records appended to: {out_json_path}")
    except Exception as e:
        print(f"[❌ Error] Failed to save accuracy report: {e}")
