import csv
import sys
import os
from io import StringIO
import argparse
import datetime
import re

def parse_set(s):
    cleaned = s.strip('{}').strip()
    if not cleaned:
        return set()
    return set(int(x.strip()) for x in cleaned.split(','))

def parse_question_param(param):
    match = re.match(r'mc-(\d+)_([\d.]+)k', param)
    if match:
        return {'mc_count': int(match.group(1)), 'q_count': int(float(match.group(2)) * 1000)}
    return {'mc_count': 0, 'q_count': 0}

def parse_candidate_path(candidate_path):
    parts = candidate_path.split(os.sep)
    if len(parts) >= 5 and parts[-1].endswith('_cand.txt'):  # Extra level for intent_space
        model = parts[-5]
        question_type = parts[-4]
        intent_space = parts[-3]
        question_param = parts[-2]
        expected_filename = f"{model}_{question_type}_{intent_space}_{question_param}_cand.txt"
        if parts[-1] == expected_filename:
            return model, question_type, intent_space, question_param
    return None, None, None, None

def grade_questions(gt_file, cand_file):
    action_space = set(range(0, 101))  # Adjusted to include 0 based on sample data
    gt_data = {}
    try:
        with open(gt_file, 'r') as f:
            lines = f.readlines()[1:]
            for line in lines:
                if line.strip():
                    parts = line.strip().split(',', 1)
                    idx = int(parts[0].strip())
                    gt_str = parts[1].strip()
                    gt_data[idx] = parse_set(gt_str)
    except Exception as e:
        return f"Error loading ground truth: {str(e)}\n", {'total_score': 0, 'total_full': 0, 'errors': 1}

    cand_data = {}
    try:
        with open(cand_file, 'r') as f:
            lines = f.readlines()[1:]
            for line in lines:
                if line.strip():
                    parts = line.strip().split(',', 1)
                    idx = int(parts[0].strip())
                    cand_str = parts[1].strip()
                    cand_data[idx] = parse_set(cand_str)
    except Exception as e:
        return f"Error loading candidate: {str(e)}\n", {'total_score': 0, 'total_full': 0, 'errors': 1}

    output = StringIO()
    total_score = 0
    total_full = 0
    for qid in sorted(gt_data.keys()):
        if qid not in cand_data:
            output.write(f"Question {qid}: Missing in candidate, score 0/{len(gt_data[qid])}\n")
            total_full += len(gt_data[qid])
            continue
        gt = gt_data[qid]
        cand = cand_data[qid]
        valid_cand = cand & action_space
        invalid = cand - valid_cand
        intersection = valid_cand & gt
        score = len(intersection)
        full = len(gt)
        output.write(f"Question {qid}: Score {score}/{full} (Invalid actions: {invalid})\n")
        total_score += score
        total_full += full

    output.write(f"\nOverall Score: {total_score}/{total_full} ({total_score / total_full:.2%} if total_full > 0 else 0)\n")
    return output.getvalue(), {'total_score': total_score, 'total_full': total_full, 'errors': 0}

def process_grading(model, question_type, intent_space, question_param, candidate_path):
    ground_truth_file = f"truth_{question_type}_{intent_space}_{question_param}.txt"
    ground_truth_path = os.path.join("ground_truth", ground_truth_file)
    
    report_content, stats = grade_questions(ground_truth_path, candidate_path)

    report_dir = f"reports/{model}/{question_type}/{intent_space}/{question_param}"
    os.makedirs(report_dir, exist_ok=True)
    report_path = os.path.join(report_dir, f"{model}_{question_type}_{intent_space}_{question_param}_report.txt")
    with open(report_path, 'w') as f:
        f.write(report_content)

    log_path = "grader_log.txt"
    params = parse_question_param(question_param)
    pass_rate = (stats['total_score'] / stats['total_full'] * 100) if stats['total_full'] > 0 else 0
    log_entry = f"[{datetime.datetime.now().isoformat()}] Model: {model}, Type: {question_type}, Intent Space: {intent_space}, Param: {question_param}, MC Count: {params['mc_count']}, Questions: {params['q_count']}, Pass Rate: {pass_rate:.2f}%, Errors: {stats['errors']}"
    with open(log_path, 'a') as log_file:
        log_file.write(log_entry + "\n")

    print(f"Report generated: {report_path}")

def main():
    parser = argparse.ArgumentParser(description="Grader for different models, types, intent_spaces, and params.")
    parser.add_argument('--model', default=None, help="e.g., qwen-plus-latest")
    parser.add_argument('--question_type', default=None, help="e.g., normal_single")
    parser.add_argument('--intent_space', default=None, help="e.g., intent66")
    parser.add_argument('--question_param', default=None, help="e.g., mc-2_0.5k")
    args = parser.parse_args()

    if all(arg is None for arg in [args.model, args.question_type, args.intent_space, args.question_param]):
        candidate_root = "candidate_answer"
        processed_count = 0
        for root, _, files in os.walk(candidate_root):
            for file in files:
                if file.endswith('_cand.txt'):
                    candidate_path = os.path.join(root, file)
                    model, question_type, intent_space, question_param = parse_candidate_path(candidate_path)
                    if model and question_type and intent_space and question_param:
                        process_grading(model, question_type, intent_space, question_param, candidate_path)
                        processed_count += 1
        if processed_count == 0:
            print("Warning: No valid candidate files found to grade.")
    else:
        if None in (args.model, args.question_type, args.intent_space, args.question_param):
            raise ValueError("Must specify all four: --model, --question_type, --intent_space, --question_param (or none for all files).")
        
        cand_dir = f"candidate_answer/{args.model}/{args.question_type}/{args.intent_space}/{args.question_param}"
        cand_file = f"{args.model}_{args.question_type}_{args.intent_space}_{args.question_param}_cand.txt"
        candidate_path = os.path.join(cand_dir, cand_file)
        if not os.path.exists(candidate_path):
            raise FileNotFoundError(f"Candidate file not found: {candidate_path}")
        
        process_grading(args.model, args.question_type, args.intent_space, args.question_param, candidate_path)

if __name__ == '__main__':
    main()
