import json
import openai
from tqdm import tqdm
import pandas as pd
import argparse
import os
import sys
from typing import Dict, List, Optional
from openai import AzureOpenAI
import re

def parse_args() -> argparse.Namespace:
    """Parse command-line arguments for parallel execution."""
    parser = argparse.ArgumentParser(
        description="Run zero-shot emotion classification over a slice of the dataset.",
    )
    parser.add_argument("--start", type=int, default=0, help="Start index (inclusive) of the slice.")
    parser.add_argument("--end", type=int, default=None, help="End index (inclusive) of the slice.")
    parser.add_argument("--output_dir", type=str, default="emotion_results", help="Directory to write JSON results.")
    parser.add_argument("--csv_path", type=str, required=True, help="Path to the input CSV with columns video_id,story")
    return parser.parse_args()

# ---------------------------------------------------------------------------
# Zero-shot classification system prompt
# ---------------------------------------------------------------------------
SYSTEM_PROMPT = (
    "You are an expert content analyst. You will be given a dictionary called emotion_vocab, which lists emotions "
    "and their definitions. You will also be given the STORY text of a video advertisement. "
    "Your task is to choose the SINGLE most relevant emotion key from emotion_vocab that best captures how viewers are likely to feel. "
    "Output ONLY the emotion key, nothing else."
)

# Emotion vocabulary
# topics = "Emotion_vocab = {'active': 'active(energetic, adventurous, vibrant, enthusiastic, playful)', 'afraid': 'afraid(horrified, scared, fearful)', 'alarmed': 'alarmed(concerned, worried, anxious, overwhelmed)', 'alert': 'alert(attentive, curious)', 'amazed': 'amazed(surprised, astonished, awed, fascinated, intrigued)', 'amused': 'amused(humored, laughing)', 'angry': 'angry(annoyed, irritated)', 'calm': 'calm(soothed, peaceful, comforted, fullfilled, cozy)', 'cheerful': 'cheerful(delighted, happy, joyful, carefree, optimistic)', 'confident': 'confident(assured, strong, healthy)', 'conscious': 'conscious(aware, thoughtful, prepared)', 'creative': 'creative(inventive, productive)', 'disturbed': 'disturbed(disgusted, shocked)', 'eager': 'eager(hungry, thirsty, passionate)', 'educated': 'educated(informed, enlightened, smart, savvy, intelligent)', 'emotional': 'emotional(vulnerable, moved, nostalgic, reminiscent)', 'empathetic': 'empathetic(sympathetic, supportive, understanding, receptive)', 'fashionable': 'fashionable(trendy, elegant, beautiful, attractive, sexy)', 'feminine': 'feminine(womanly, girlish)', 'grateful': 'grateful(thankful)', 'inspired': 'inspired(motivated, ambitious, empowered, determined)', 'jealous': 'jealous', 'loving': 'loving(loved, romantic)', 'manly': 'manly', 'persuaded': 'persuaded(impressed, enchanted, immersed)', 'pessimistic': 'pessimistic(skeptical)', 'proud': 'proud(patriotic)', 'sad': 'sad(depressed, upset, betrayed, distant)', 'thrifty': 'thrifty(frugal)', 'youthful': 'youthful(childlike)'}"

topics = "Emotion_vocab = {'joy': 'positive feelings such as happiness, delight, cheerfulness', 'trust': 'feelings of confidence, safety, reliability, comfort', 'fear': 'feelings of worry, scare, anxiety, concern', 'anger': 'feelings of annoyance, irritation, outrage', 'disgust': 'feelings of aversion, repulsion, disturbance', 'anticipation': 'feelings of excitement, eagerness, curiosity about the future', 'unclear': 'emotion is ambiguous or cannot be determined'}"
def main():
    args = parse_args()
    os.makedirs(args.output_dir, exist_ok=True)

    # Setup Azure OpenAI client
    api_version = "2024-02-15-preview"
    config_dict: Dict[str, str] = {
        "api_key": os.getenv("OPENAI_API_KEY", "YOUR_OPENAI_API_KEY"),
        "api_version": api_version,
        "azure_endpoint": os.getenv("AZURE_OPENAI_ENDPOINT", "https://your-azure-openai-endpoint/"),
    }
    client = AzureOpenAI(
        api_key=config_dict["api_key"],
        api_version=config_dict["api_version"],
        azure_endpoint=config_dict["azure_endpoint"],
    )

    # Load CSV data
    try:
        df = pd.read_csv(args.csv_path)
    except Exception as e:
        print(f"Error reading CSV {args.csv_path}: {e}")
        sys.exit(1)

    all_records = df.to_dict(orient='records')

    # Determine slice for this run
    start_idx = args.start
    end_idx = len(all_records) - 1 if args.end is None else min(args.end, len(all_records) - 1)
    slice_records = all_records[start_idx : end_idx + 1]

    print(f"Processing slice {start_idx}–{end_idx} (n={len(slice_records)})")

    results = []
    output_path = os.path.join(args.output_dir, f"emotion_results_{start_idx}_{end_idx}.json")

    for rec in tqdm(slice_records, desc=f"Emotion Eval {start_idx}-{end_idx}"):
        try:
            video_id = str(rec.get('video_id', '')).strip()
            story_text = rec.get('story', '')
            cleaned_text = ' '.join(str(story_text).split()).replace('\n', '').replace('\f', '')

            # Build zero-shot prompt
            messages = [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": f"{topics}\n\nStory: {cleaned_text}"},
            ]

            try:
                response = client.chat.completions.create(
                    model="gpt-4o",
                    messages=messages,
                    max_tokens=20,
                    temperature=0.0,
                    n=1,
                )
                pred_topic = response.choices[0].message.content.strip().lower().strip("'\". ,")
            except Exception as e:
                print(f"Error during OpenAI call for key {video_id}: {e}")
                pred_topic = "error_api"

            # Store results
            result_item = {
                'video_id': video_id,
                'url': f"https://www.youtube.com/watch?v={video_id}" if video_id else "",
                'story': cleaned_text,
                'predicted_topic': pred_topic,
            }
            results.append(result_item)
            
            # Incremental save
            with open(output_path, 'w') as f:
                json.dump(results, f, indent=4)

        except Exception as e:
            print(f"Error processing key {video_id}: {e}")
            continue

    print(f"Finished processing. Results saved to {output_path}")

if __name__ == "__main__":
    main()




