import argparse
import numpy as np
import os
import torch
import pandas as pd
from typing import Dict, List, Tuple, Any
import signal
from pathlib import Path
from code.utils import compute_population_dist
from code.agents import init_agent, init_agent_wikiarts, get_agent_responses
from code.methods.single import run_single
from code.methods.random import run_random
from code.methods.kmedoids import run_kmedoids
from code.methods.samplegreedy import run_samplegreedy
from code.methods.reppopdemo import run_reppopdemo
from code.methods.reppopmapped import run_reppopmapped_one, run_reppopmapped_two
import json
import time
import joblib
import pickle
from code.utils import get_embedding
from itertools import combinations
import random
os.environ["TOKENIZERS_PARALLELISM"] = "false"

def setup_signal_handler():
    def handler(signum, frame):
        raise TimeoutError("Agent query timed out")
    signal.signal(signal.SIGALRM, handler)

def main(args):
    #---------------------------------------------------------
    # Setup 
    #---------------------------------------------------------
    os.makedirs(args.output_dir, exist_ok=True)
    setup_signal_handler()
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.deterministic = True
    
    #---------------------------------------------------------
    # Load data
    #---------------------------------------------------------
    train_questions_path = os.path.join(args.data_dir, 'question_splits/train_questions.json')
    with open(train_questions_path, 'r') as f:
        train_questions = json.load(f)
    test_questions_path = os.path.join(args.data_dir, 'question_splits/test_questions.json')
    with open(test_questions_path, 'r') as f:
        test_questions = json.load(f)
    with open(os.path.join(args.data_dir, 'preprocessed_data.pkl'), 'rb') as f:
        user_responses_train, user_responses_test, question_ids, _, _, _ = pickle.load(f)
    if args.domain == 'eedi':
        pd_questions = pd.read_csv('data/eedi/selected_questions.csv')
        pd_questions['key'] = pd_questions['key'].astype(str)
        pd_questions['CorrectAnswer'] = pd_questions['CorrectAnswer'].astype(str)
    elif args.domain == 'opinionqa':
        pd_questions = pd.read_csv('data/opinionqa/American_Trends_Panel_W92/info.csv')
    else:
        pd_questions = None
    # Load human embeddings
    human_embeddings_train_path = os.path.join(args.data_dir, 'human_embeddings_train')
    human_embeddings_train_dict = {}
    for file in os.listdir(human_embeddings_train_path):
        if file.endswith('.npy'):
            uid = file.split('.')[0]
            human_embeddings_train_dict[uid] = np.load(os.path.join(human_embeddings_train_path, file))
    user_ids = list(human_embeddings_train_dict.keys())
    user_embeddings_np = np.stack(list(human_embeddings_train_dict.values()), axis=0)
    
    start_time = time.time()
    if args.domain == 'wikiarts':
        from sklearn.decomposition import PCA
        max_components = min(user_embeddings_np.shape[0], user_embeddings_np.shape[1])
        n_components = min(64, max_components - 1)
        print(f"Using PCA with {n_components} components (max possible: {max_components})")
        
        reducer = PCA(n_components=64, random_state=args.seed)
        reducer.fit(user_embeddings_np)
        human_embeddings = reducer.transform(user_embeddings_np)
        joblib.dump(reducer, os.path.join(args.output_dir, f'reducer_{n_components}d.joblib'))
    else:
        human_embeddings = user_embeddings_np
        reducer = None

    # Initialize model
    if args.domain == 'wikiarts':
        model, tokenizer = init_agent_wikiarts(args.model, adapter_path=None)
    else:
        model, tokenizer = init_agent(args.model, adapter_path=None)
    
    # # Run selected method
    if args.method == 'single':
        results = run_single(
            args, human_embeddings, train_questions, test_questions, pd_questions,
            user_responses_train, model, tokenizer, reducer, args.domain)
    elif args.method == 'random':
        results = run_random(
            args, human_embeddings, train_questions, test_questions, pd_questions,
            user_responses_train, model, tokenizer, reducer, args.domain)
    elif args.method == 'kmedoids':
        results = run_kmedoids(
            args, human_embeddings, train_questions, test_questions, pd_questions,
            user_responses_train, model, tokenizer, reducer, user_ids, args.domain)
    elif args.method == 'samplegreedy':
        results = run_samplegreedy(
            args, human_embeddings, train_questions, test_questions, pd_questions,
            user_responses_train, model, tokenizer, reducer, user_ids, args.domain)
    elif args.method == 'reppopdemo':
        results = run_reppopdemo(
            args, human_embeddings, train_questions, test_questions, pd_questions,
            user_responses_train, model, tokenizer, reducer, args.domain)
    elif args.method == 'reppopmapped_one':
        results = run_reppopmapped_one(
            args, human_embeddings, train_questions, test_questions, pd_questions,
            user_responses_train, model, tokenizer, reducer, user_ids, args.domain)
    elif args.method == 'reppopmapped_two':
        results = run_reppopmapped_two(
            args, human_embeddings, train_questions, test_questions, pd_questions,
            user_responses_train, model, tokenizer, reducer, user_ids, args.domain)

def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument('--domain', type=str, required=True,
                        help='Domain of the dataset (e.g., opinionqa)')
    parser.add_argument('--method', type=str, 
                      required=True, help='Method to use')
    parser.add_argument('--model', type=str, help='Model to use')
    parser.add_argument('--temp', type=float, default=1.0,
                      help='Temperature for LLM sampling')
    parser.add_argument('--data_dir', type=str, default='output/preprocessed_opinionqa',
                      help='Path to data directory')
    parser.add_argument('--output_dir', type=str, default='experiments/opinionqa',
                      help='Path to save experiment results')
    parser.add_argument('--n_agents', type=int, default=10,
                      help='Number of agents')
    parser.add_argument('--k_examples', type=int, default=5,
                      help='Number of examples')
    parser.add_argument('--sampling_size', type=int, default=100,
                        help='Sampling size in each iteration for reppopdemo')
    parser.add_argument('--seed', type=int, default=42,
                      help='Random seed')
    parser.add_argument('--distance', type=str, default='euclidean', required=True,
                   help='Distance metric')
    parser.add_argument('--role_play', action='store_true', default=True)
        
    return parser.parse_args()
    
if __name__ == "__main__":
    args = parse_args()
    main(args)







