import pandas as pd
import asyncio
import os
import argparse
import sys
from datetime import datetime
from .claim_verifier import verify_claims_support_only_async, VerifiedClaim
from .claim_processor import Claim
from typing import List
from dotenv import load_dotenv
import json

load_dotenv()

async def main():
    parser = argparse.ArgumentParser(description='Evaluate claims support only.')
    parser.add_argument('--input_file', type=str, default='merged_4-2.xlsx', help='Input Excel file path')
    parser.add_argument('--output_dir', type=str, default='results', help='Output directory')
    parser.add_argument('--model', type=str, default='gpt-5-mini', help='Model to use')
    parser.add_argument('--limit_tokens', action='store_true', default=False, help='Limit tokens using chunking')
    parser.add_argument('--top_k', type=int, default=2, help='Top K chunks for token limiting')
    parser.add_argument('--chunk_size', type=int, default=1000, help='Chunk size for token limiting')
    parser.add_argument('--batch_size', type=int, default=10, help='Batch size for verification')
    parser.add_argument('--prompt_file', type=str, default='prompts/claim_verification_v2.txt', help='Path to prompt file')
    parser.add_argument('--embedding_method', type=str, choices=["bm25", "openai"], default="bm25", help='Embedding method for source filtering')
    parser.add_argument('--reasoning_effort', type=str, default='high', choices=['low', 'medium', 'high'], help='Reasoning effort for reasoning models')
    
    args = parser.parse_args()

    # if args.batch_size > 1:
    #     args.limit_tokens = True
    #     print(f"Setting limit_tokens=True because batch_size ({args.batch_size}) > 1")
    
    # Load prompt
    try:
        with open(args.prompt_file, 'r', encoding='utf-8') as f:
            prompt_template = f.read()
    except Exception as e:
        print(f"Error loading prompt file {args.prompt_file}: {e}")
        sys.exit(1)
    
    input_file = args.input_file
    model_safe_name = args.model.replace('/', '_')
    model_dir = os.path.join(args.output_dir, model_safe_name)
    os.makedirs(model_dir, exist_ok=True)
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_filename = f"evaluated_claims_support_{timestamp}.xlsx"
    output_file = os.path.join(model_dir, output_filename)
    
    print(f"Loading {input_file}...")
    try:
        df = pd.read_excel(input_file)
    except Exception as e:
        print(f"Error reading input file: {e}")
        sys.exit(1)
    
    # Prepare claims
    claims = []
    # Check required columns
    required_cols = ['claim', 'URLs']
    for col in required_cols:
        if col not in df.columns:
            print(f"Missing required column: {col}")
            # Try to be flexible if column names are slightly different
            if col == 'URLs' and 'url' in df.columns:
                df.rename(columns={'url': 'URLs'}, inplace=True)
            elif col == 'claim' and 'Claim' in df.columns:
                df.rename(columns={'Claim': 'claim'}, inplace=True)
            else:
                sys.exit(1)

    print(f"Processing {len(df)} rows...")

    for idx, row in df.iterrows():
        urls_str = str(row.get('URLs', ''))
        if pd.isna(row.get('URLs')):
            urls = []
        else:
            urls = str(row.get('URLs', '')).split(', ')
            urls = [u.strip() for u in urls if u.strip()]
        
        # Use 'position' if available, else index
        position = str(row.get('position', f'Row_{idx}'))
        
        claim = Claim(
            position=position,
            claim=str(row.get('claim', '')),
            claim_type=str(row.get('class', 'A')), # Default to A
            rationale=str(row.get('rationale', '')),
            numeric=True, # Always check numeric
            citations=urls
        )
        claims.append(claim)
    
    print(f"Verifying {len(claims)} claims with model {args.model}...")
    print(f"Configuration: Limit Tokens={args.limit_tokens}, Batch Size={args.batch_size}")
    
    # Verify claims
    try:
        results, context_info, total_usage = await verify_claims_support_only_async(
            claims, 
            model=args.model,
            return_context=True,
            limit_tokens=args.limit_tokens,
            top_k=args.top_k,
            chunk_size=args.chunk_size,
            batch_size=args.batch_size,
            prompt_template=prompt_template,
            embedding_method=args.embedding_method,
            reasoning_effort=args.reasoning_effort
        )
    except Exception as e:
        print(f"Error during verification: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
    
    # Map results back to DataFrame
    # We use a map keyed by (position, claim_text) to be safe
    result_map = {}
    for res in results:
        result_map[(res.claim.position, res.claim.claim)] = res
        
    # Prepare result columns
    predicted_results = []
    predicted_explanations = []
    
    # Additional detailed columns
    numeric_errors = []
    relevant_contexts = []
    corresponding_scopes = []
    
    for idx, row in df.iterrows():
        position = str(row.get('position', f'Row_{idx}'))
        claim_text = str(row.get('claim', ''))
        
        key = (position, claim_text)
        
        if key in result_map:
            verified_claim = result_map[key]
            final_res, explanation = verified_claim.final_result_and_explanation()
            
            predicted_results.append(final_res)
            predicted_explanations.append(explanation)
            
            # Aggregate detailed checks from all verifications (if any is True/False appropriately)
            # Logic: 
            # numeric_error: True if ANY verification found error
            # relevant_context: True if ANY verification found relevant context
            # corresponding_scope: True if ANY verification found matching scope
            
            verifications = verified_claim.verifications
            
            if not verifications:
                numeric_errors.append(None)
                relevant_contexts.append(None)
                corresponding_scopes.append(None)
            else:
                # Numeric error: if any verification says True, it's an error
                n_err = any(v.numeric_error for v in verifications if v.numeric_error is not None)
                numeric_errors.append(n_err)
                
                # Context: if any verification says True, we found context
                c_rel = any(v.relavant_context for v in verifications if v.relavant_context is not None)
                relevant_contexts.append(c_rel)
                
                # Scope: if any verification says True, scope matched
                s_cor = any(v.corresponding_scope for v in verifications if v.corresponding_scope is not None)
                corresponding_scopes.append(s_cor)
                
        else:
            predicted_results.append('error')
            predicted_explanations.append('Verification failed or not found')
            numeric_errors.append(None)
            relevant_contexts.append(None)
            corresponding_scopes.append(None)

    # Add to DataFrame
    df['predicted_result'] = predicted_results
    df['predicted_explanation'] = predicted_explanations
    df['predicted_numeric_error'] = numeric_errors
    df['predicted_relevant_context'] = relevant_contexts
    df['predicted_corresponding_scope'] = corresponding_scopes
    
    print(f"Saving results to {output_file}...")
    
    # Sanitize for Excel
    def sanitize(val):
        if isinstance(val, str):
            # Remove illegal XML characters
            return "".join(ch for ch in val if (0x20 <= ord(ch) <= 0xD7FF) or (0xE000 <= ord(ch) <= 0xFFFD) or ch in "\t\r\n")
        return val

    df = df.applymap(sanitize)

    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        df.to_excel(writer, sheet_name='Results', index=False)
        
        # Metadata
        meta = {
            'model': [args.model],
            'prompt_file': [args.prompt_file],
            'timestamp': [timestamp],
            'limit_tokens': [args.limit_tokens],
            'batch_size': [args.batch_size],
            'top_k': [args.top_k],
            'chunk_size': [args.chunk_size],
            'total_cost': [total_usage.get('cost', 0.0)],
            'total_prompt_tokens': [total_usage.get('prompt_tokens', 0)],
            'total_completion_tokens': [total_usage.get('completion_tokens', 0)],
            'total_tokens': [total_usage.get('total_tokens', 0)],
            'reasoning_effort': [args.reasoning_effort],
            'embedding_method': [args.embedding_method]
        }
        pd.DataFrame(meta).to_excel(writer, sheet_name='Metadata', index=False)
        
    print("Done.")

if __name__ == "__main__":
    asyncio.run(main())
