import openai
from utils import encode_keyframes, random_extract_and_encode_frames, clean_and_convert_to_dict, extract_key_frames_for_Qwen, load_image_file_uris_for_Qwen, random_extract_keyframes_for_gemini, load_keyframes_for_gemini
import dashscope
import anthropic
import google.generativeai as genai
import time
import base64
from PIL import Image
import shutil
import io
from http import HTTPStatus
import dashscope
dashscope.api_key="<Your API Keys>"
genai.configure(api_key="<Your API Keys>")

def Claude3_Opus(prompt, video_path, setting):
    content = []
    if setting == 'Random':
        encoded_frames = random_extract_and_encode_frames(video_path)
        for frame in encoded_frames:
            content.append({
                "type": "image",
                "source": {
                "type": "base64",
                "media_type": "image/png",
                "data": frame,
                }})
    elif setting == 'Extracted':
        encoded_frames = encode_keyframes(video_path)
        if len(encoded_frames) == 0:
            return
        for i in range(len(encoded_frames)):
            content.append({
                    "type": "text",
                    "text": f"Image {i}:"
                })
            content.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/jpeg",
                    "data": encoded_frames[i],
                }})
    content.append({"type": "text", "text": prompt})
    success = False
    attempt = 0
    while not success and attempt < 3:
        try:
            client = anthropic.Anthropic(
            # defaults to os.environ.get("ANTHROPIC_API_KEY")
            api_key="<Your Claude API Key>")

            message = client.messages.create(
                model="claude-3-opus-20240229",
                max_tokens=2048,
                temperature=0.9,
                messages= [{"role": "user",
                            "content": content}]
                )
            
            success = True
            clean_dict = clean_and_convert_to_dict(message.content[0].text)
            print(clean_dict)
            return clean_dict
        except Exception as e:
            print(e)
            attempt += 1
            time.sleep(10)

def Qwen(prompt, video_path, setting):
    content = []
    if setting == 'Random':
        image_files = extract_key_frames_for_Qwen(video_path)
        if len(image_files) == 0:
            return
        content.extend([{"image": f"{file}"} for file in image_files])
    elif setting == 'Extracted':
        image_files = load_image_file_uris_for_Qwen(video_path, scale=0.3)
        if len(image_files) == 0:
            return
        content.extend([{"image": f"{file}"} for file in image_files])
    
    content.append({
            "text": prompt
        })
    messages = [
        {
            "role": "user",
            "content": content
        }
    ]

    for i in range(2):
        response = dashscope.MultiModalConversation.call(model='qwen-vl-max', messages=messages)
        if response.status_code == HTTPStatus.OK:
            clean_dict = clean_and_convert_to_dict(response["output"]["choices"][0]["message"]["content"][0]["text"])
            print(clean_dict)
            return clean_dict
        else:
            print(response.code) 
            print(response.message)  
            time.sleep(10)
            
def GPT(prompt, video_path, setting):
    openai.api_type = "azure"
    openai.api_base = "<Your API URL>"
    openai.api_version = "<Your API Version>"
    openai.api_key = '<Your API Key>'
    engine_name = "<Your Engine Name>"

    content = [{"type": "text", "text": prompt}]
    if setting == 'Random':
        encoded_frames = random_extract_and_encode_frames(video_path)
        if len(encoded_frames) == 0:
            return
        for frame in encoded_frames:
            content.append({"type": "image_url", "image_url": {"url": f"data:image/png;base64,{frame}"}})
    elif setting == 'Extracted':
        encoded_frames = encode_keyframes(video_path)
        if len(encoded_frames) == 0:
            return
        for frame in encoded_frames:
            content.append({"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{frame}"}})
    
    success = False
    attempt = 0
    while not success and attempt < 3:
        try:
            response = openai.ChatCompletion.create(
                engine= engine_name,
                messages=[
                    {
                        "role": "user",
                        "content": content
                    }
                ],
                max_tokens = 2048,
                temperature = 0.9,
                top_p = 0.9
            )
            success = True
            clean_dict = clean_and_convert_to_dict(response.choices[0].message.content)
            print(clean_dict)
            return clean_dict
        except Exception as e:
            print(e)
            attempt += 1
            time.sleep(10)
            

def Gemini(prompt, video_path, setting):
    model = genai.GenerativeModel('gemini-1.5-pro-latest')

    print(setting)
    if setting == 'Random':
        frames = random_extract_keyframes_for_gemini(video_path)
    elif setting == 'Extracted':
        frames = load_keyframes_for_gemini(video_path)
    
    if len(frames) == 0:
        return
    
    content = [prompt]
    content.extend(frames)

    success = False
    attempt = 0
    while not success and attempt < 5:
        try:
            response = model.generate_content(content, stream=True)
            response.resolve()
            success = True
            clean_dict = clean_and_convert_to_dict(response.text)
            print(clean_dict)
            return clean_dict
        except Exception as e:
            print(e)
            attempt += 1
            time.sleep(60)