from pathlib import Path
import json
import os
from dotenv import load_dotenv
import asyncio
import requests # We'll keep requests and run it in an executor
import mimetypes
from tqdm import tqdm
import pandas as pd
import tempfile # Added for TemporaryDirectory management
import base64

MODEL_NAME = "gemini-2.5-pro-preview"  # Updated for Hyprlab

FINAL_PROMPT = '''You are an emotion-analysis engine. You will be given an audio file of one or more speakers. Analyze the tone and delivery to estimate the intensity of the following emotions on a scale from 0 (not present) to 10 (extremely strong):

Affection, Amusement, Anger, Arousal, Astonishment, Authenticity, Awe, Bitterness, Concentration, Confusion, Contemplation, Contempt, Contentment, Disappointment, Disgust, Distress, Doubt, Elation, Embarrassment, Emotional Numbness, Fatigue, Fear, Helplessness, Hope, Impatience and Irritability, Infatuation, Interest, Intoxication, Jealousy / Envy, Longing, Malevolence, Pain, Pleasure, Pride, Relief, Sadness, Sexual Lust, Shame, Sourness, Teasing, Thankfulness, Triumph.

Return **ONLY** a JSON dictionary exactly in this format (no extra text, no markdown):

{
  "Affection": 0,
  "Amusement": 0,
  "Anger": 0,
  "Arousal": 0,
  "Astonishment": 0,
  "Authenticity": 0,
  "Awe": 0,
  "Bitterness": 0,
  "Concentration": 0,
  "Confusion": 0,
  "Contemplation": 0,
  "Contempt": 0,
  "Contentment": 0,
  "Disappointment": 0,
  "Disgust": 0,
  "Distress": 0,
  "Doubt": 0,
  "Elation": 0,
  "Embarrassment": 0,
  "Emotional Numbness": 0,
  "Fatigue": 0,
  "Fear": 0,
  "Helplessness": 0,
  "Hope": 0,
  "Impatience and Irritability": 0,
  "Infatuation": 0,
  "Interest": 0,
  "Intoxication": 0,
  "Jealousy / Envy": 0,
  "Longing": 0,
  "Malevolence": 0,
  "Pain": 0,
  "Pleasure": 0,
  "Pride": 0,
  "Relief": 0,
  "Sadness": 0,
  "Sexual Lust": 0,
  "Shame": 0,
  "Sourness": 0,
  "Teasing": 0,
  "Thankfulness": 0,
  "Triumph": 0
}

Now analyze the following audio file and output the filled-in JSON:'''

BATCH_SIZE = 2 # Number of concurrent tasks

# --- Synchronous helper functions that use requests ---
def _download_audio_sync(url, local_path):
    if not local_path.exists():
        resp = requests.get(url, stream=True, timeout=60) # Added timeout
        resp.raise_for_status()
        with open(local_path, "wb") as f:
            for chunk in resp.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_path

def _hyprlab_generate_content_sync(audio_path, api_key):
    url = f"https://api.hyprlab.io/v1beta/models/{MODEL_NAME}:generateContent?key={api_key}"
    headers = {"Content-Type": "application/json"}

    with open(audio_path, "rb") as f:
        audio_bytes = f.read()
    audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")

    data = {
        "system_instruction": {
            "parts": {
                "text": "You are a helpful assistant."
            }
        },
        "contents": {
            "parts": [
                {
                    "inline_data": {
                        "mime_type": "audio/mpeg",
                        "data": audio_b64
                    }
                },
                {
                    "text": FINAL_PROMPT
                }
            ]
        },
        "safetySettings": [
            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
            {"category": "HARM_CATEGORY_CIVIC_INTEGRITY", "threshold": "BLOCK_NONE"}
        ]
    }
    resp = requests.post(url, headers=headers, json=data, timeout=180)
    resp.raise_for_status()
    return resp.json()

# --- Async wrappers for the synchronous functions ---
async def download_audio(url, local_path, loop):
    return await loop.run_in_executor(None, _download_audio_sync, url, local_path)

async def hyprlab_generate_content(audio_path, api_key, loop):
    return await loop.run_in_executor(None, _hyprlab_generate_content_sync, audio_path, api_key)

async def process_file(audio_path, audio_data_dir, voice_emotions_dir, api_key, loop, pbar_individual):
    rel_path = audio_path.relative_to(audio_data_dir)
    out_path = voice_emotions_dir / rel_path.with_suffix(rel_path.suffix + ".json")

    if out_path.exists():
        pbar_individual.update(1)
        return f"Skipped (exists): {rel_path}"

    max_retries = 3
    for attempt in range(max_retries):
        try:
            result = await hyprlab_generate_content(audio_path, api_key, loop)

            final_json_to_save = {}
            # Hyprlab returns text in result['candidates'][0]['content']['parts'][0]['text']
            if "candidates" in result and result["candidates"]:
                content = result["candidates"][0].get("content", {})
                if "parts" in content and content["parts"]:
                    text_part = content["parts"][0].get("text", "")
                    try:
                        text_part = text_part.strip()
                        if text_part.startswith("```json"):
                            text_part = text_part[7:]
                        elif text_part.startswith("```"):
                            text_part = text_part[3:]
                        if text_part.endswith("```"):
                            text_part = text_part[:-3]
                        text_part = text_part.strip()
                        final_json_to_save = json.loads(text_part)
                    except json.JSONDecodeError as e:
                        print(f"Error decoding JSON from Hyprlab for {rel_path}: {e}. Raw text: '{text_part}'")
                        final_json_to_save = {"error": "JSONDecodeError", "raw_text": text_part, "original_api_response": result}
                else:
                     final_json_to_save = {"error": "No parts in content", "original_api_response": result}
            else:
                final_json_to_save = {"error": "No candidates in API response", "original_api_response": result}

            os.makedirs(out_path.parent, exist_ok=True)
            with open(out_path, "w", encoding="utf-8") as f:
                json.dump(final_json_to_save, f, indent=2, ensure_ascii=False)
            pbar_individual.update(1)
            return f"Processed: {rel_path}"
        except Exception as e:
            error_msg = f"Attempt {attempt + 1}/{max_retries} failed for {audio_path}: {type(e).__name__} {e}"
            if isinstance(e, requests.exceptions.HTTPError) and e.response is not None:
                error_msg += f" - Response: {e.response.text}"
            print(error_msg)

            if attempt == max_retries - 1:
                pbar_individual.update(1)
                return f"Failed (final attempt): {rel_path} - {type(e).__name__} {e}"
            await asyncio.sleep(30 * (attempt + 1))
    return f"Failed (exhausted retries): {rel_path}"

async def main_processing_loop(audio_files, audio_data_dir, voice_emotions_dir, google_api_key):
    loop = asyncio.get_event_loop()
    with tqdm(total=len(audio_files), desc="Total files processed") as pbar_individual:
        for i in range(0, len(audio_files), BATCH_SIZE):
            batch_files = audio_files[i:i + BATCH_SIZE]
            tasks = [
                process_file(audio_path, audio_data_dir, voice_emotions_dir, google_api_key, loop, pbar_individual)
                for audio_path in batch_files
            ]
            if tasks:
                print(f"\nProcessing batch {i//BATCH_SIZE + 1} of {len(audio_files)//BATCH_SIZE + 1} (Size: {len(tasks)})")
                results = await asyncio.gather(*tasks, return_exceptions=True)
                for res_msg in results:
                    if isinstance(res_msg, Exception):
                        print(f"An unhandled exception occurred in a task: {res_msg}")
                    elif "Failed" in str(res_msg):
                        print(res_msg)

def run():
    load_dotenv()
    GOOGLE_API_KEY = os.getenv("HYPRLAB_API_KEY")
    if not GOOGLE_API_KEY:
        raise RuntimeError("HYPRLAB_API_KEY not found in .env file or environment variables.")

    output_dir = Path("./output")
    output_dir.mkdir(parents=True, exist_ok=True)

    audio_data_dir = Path("audio")
    audio_files = [Path(f) for f in audio_data_dir.glob("**/*.mp3")]
    audio_files = [f for f in audio_files if f.is_file()]

    if not audio_files:
        print("No audio files found for analysis.")
        return

    voice_emotions_dir = output_dir / MODEL_NAME
    voice_emotions_dir.mkdir(parents=True, exist_ok=True)

    asyncio.run(main_processing_loop(audio_files, audio_data_dir, voice_emotions_dir, GOOGLE_API_KEY))

if __name__ == '__main__':
    load_dotenv()
    run()