import sys
import os
import io, asyncio
from PIL import Image
parent_dir = os.path.abspath('.')
sys.path.append(parent_dir)
import base64
import json
import anthropic
import requests
import replicate
import google.generativeai as genai
from dotenv import load_dotenv
from tools.image_to_url import local_image_to_data_url
from concurrent.futures import as_completed
from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI
load_dotenv()

# os.environ['http_proxy'] = os.getenv("HTTP_PROXY")
# os.environ['https_proxy'] = os.getenv("HTTPS_PROXY")
os.environ['REPLICATE_API_TOKEN'] = os.getenv("REPLICATE_API_KEY")

def encode_image_to_base64(image):
    buffered = io.BytesIO()
    image.save(buffered, format=image.format)
    return base64.b64encode(buffered.getvalue()).decode('utf-8')

def gpt4o(message, image_path=None):
    client = OpenAI(
        api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
    )
    if image_path is None:
        response = client.chat.completions.create(
            model='gpt-4o',
            messages=[{"role": "user", "content": message}],
        )
    else:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": message},
                        {
                            "type": "image_url",
                            "image_url": {"url": f"{image_path}"},
                        },
                    ],
                }
            ],
        )

    return response.choices[0].message.content
    
def intern_VL(prompt,image_path):
    client = OpenAI(
        api_key=os.getenv("INTERN_VL_KEY"),  # 此处传token，不带Bearer
        base_url="https://chat.intern-ai.org.cn/api/v1/",
    )

    with open(image_path, "rb") as img_file:
        image_data = base64.b64encode(img_file.read()).decode("utf-8")
    chat_rsp = client.chat.completions.create(
        model="internvl2.5-latest",
        messages=[{
            "role": "user", 
            "content": [
                {
                    "type": "text",
                    "text": prompt
                },
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}
                }
            ]
            }],
    )

    # for choice in chat_rsp.choices:
    #     print(choice.message.content)
    return chat_rsp.choices[0].message.content

async def async_gpt4o(message, image_path=None):
    client = AsyncOpenAI(
        api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
    )
    if image_path is None:
        response = await client.chat.completions.create(
            model='gpt-4o',
            messages=[{"role": "user", "content": message}],
        )
    else:
        image = Image.open(image_path)
        base64_image = encode_image_to_base64(image)
        image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
        response = await client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": message},
                        image_message
                    ],
                }
            ],
        )

    return response.choices[0].message.content

async def async_gpt4o_mini(message, image_path=None):
    client = AsyncOpenAI(
        api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
    )
    if image_path is None:
        response = await client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[{"role": "user", "content": message}],
        )
    else:
        image = Image.open(image_path)
        base64_image = encode_image_to_base64(image)
        image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
        response = await client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": message},
                        image_message
                    ],
                }
            ],
        )
        
    return response.choices[0].message.content
    
async def async_azure_gpt4o(message, image_path=None):
    client = AsyncAzureOpenAI(
        api_key=os.getenv("Azure_API_KEY"),
        api_version=os.getenv("Azure_API_VERSION"),
        azure_endpoint=os.getenv("Azure_ENDPOINT")
    )
    if image_path is None:
        response = await client.chat.completions.create(
            model='gpt-4o',
            messages=[{"role": "user", "content": message}],
        )
    else:
        image = Image.open(image_path)
        base64_image = encode_image_to_base64(image)
        image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
        response = await client.chat.completions.create(
            model='gpt-4o',
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": message},
                        image_message
                    ]
                }
            ],
        )
    return response.choices[0].message.content

async def async_azure_gpt4o_mini(message, image_path=None):
    client = AsyncAzureOpenAI(
        api_key=os.getenv("Azure_API_KEY"),
        api_version=os.getenv("Azure_API_VERSION"),
        azure_endpoint=os.getenv("Azure_ENDPOINT"),
    )
    if image_path is None:
        response = await client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[{"role": "user", "content": message}],
        )
    else:
        image = Image.open(image_path)
        base64_image = encode_image_to_base64(image)
        image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
        response = await client.chat.completions.create(
            model='gpt-4o-mini',
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": message},
                        image_message
                    ]
                }
            ],
        )
    return response.choices[0].message.content

async def async_claude_3_5_sonnet(prompt, image_path=None):
    client = anthropic.AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    if image_path is None:
        message = await client.messages.create(
            model='claude-3-5-sonnet-20241022',
            temperature=0.6,
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}]
        )
    else:
        image_data = base64.b64encode(open(image_path, "rb").read()).decode("utf-8")
        media_type = "image/webp" 
        message = await client.messages.create(
            model="claude-3-5-sonnet-20241022",
            max_tokens=1024,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": media_type,
                                "data": image_data,
                            },
                        },
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ],
                }
            ],
        )
    return message.content[0].text

async def async_claude_3_5_haiku(prompt, image_path=None):
    client = anthropic.AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    if image_path is None:
        message = await client.messages.create(
            model='claude-3-5-haiku-latest',
            temperature=0.6,
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}]
        )
    else:
        with open(image_path, "rb") as img_file:
            image_data = base64.b64encode(img_file.read()).decode("utf-8")
        media_type = "image/webp"
        message = await client.messages.create(
            model="claude-3-5-haiku-latest",
            max_tokens=1024,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": media_type,
                                "data": image_data,
                            },
                        },
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ],
                }
            ],
        )
    return message.content[0].text

async def async_claude_3_haiku(prompt, image_path=None):
    client = anthropic.AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
    if image_path is None:
        message = await client.messages.create(
            model='claude-3-haiku-20240307',
            temperature=0.6,
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}]
        )
    else:
        with open(image_path, "rb") as img_file:
            image_data = base64.b64encode(img_file.read()).decode("utf-8")
        media_type = "image/webp"
        message = await client.messages.create(
            model="claude-3-haiku-20240307",
            max_tokens=1024,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "image",
                            "source": {
                                "type": "base64",
                                "media_type": media_type,
                                "data": image_data,
                            },
                        },
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ],
                }
            ],
        )
    return message.content[0].text

async def async_gemini_1_5_flash(prompt, image_path=None):
    genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
    model = genai.GenerativeModel(model_name="gemini-1.5-flash")
    if image_path is None:
        response = await model.generate_content_async([prompt])
    else:
        image = Image.open(image_path)
        response = await model.generate_content_async([prompt, image])
        
    return response.text

async def async_gemini_1_5_pro(prompt, image_path=None):
    genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
    model = genai.GenerativeModel(model_name="gemini-1.5-pro")
    if image_path is None:
        response = await model.generate_content_async([prompt])
    else:
        image = Image.open(image_path)
        response = await model.generate_content_async([prompt, image])
        
    return response.text

async def async_glm_4v_plus(prompt, image_path=None):
    client = AsyncOpenAI(
        api_key=os.getenv("ZHIPU_API_KEY"),
        base_url="https://open.bigmodel.cn/api/paas/v4/",
    )
    if image_path is None:
        response = await client.chat.completions.create(
            model="glm-4v-plus",
            messages=[{"role": "user", "content": prompt}],
        )
        return response.choices[0].message.content
    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    response = await client.chat.completions.create(
        model="glm-4v-plus",
        messages=[
            {   
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    image_message
                ]
            }
        ],
    )
    return response.choices[0].message.content

async def async_openai(query, image_file):
    data = {
        "messages": [{"role": "user", "content": [{"type": "text", "text": query}]}],
        "model": "gpt-4o",
    }
    if image_file:
        image = Image.open(image_file)
        base64_image = encode_image_to_base64(image)
        image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
        data['messages'][0]["content"].append(image_message)

    headers = {
        "content-type": "application/json",
        "Authorization": f"Bearer {os.getenv('api_key')}"
    }
    url = os.getenv('base_url') 
    try:
        response = requests.post(url, json=data, headers=headers)
        response.raise_for_status()
    except requests.exceptions.HTTPError as e:
        print(f"Error making request: {e}")
    try:
        response_json = response.json()
        return response_json["choices"][0]["message"]["content"]
    except (KeyError, IndexError, ValueError) as e:
        print(f"Error parsing response: {e}")
        print(response.text)
        return ""
    
async def async_llama_3_2(prompt, image_path=None):
    client = AsyncOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    )
    if image_path is None:
        completion = await client.chat.completions.create(
            model="meta-llama/llama-3.2-90b-vision-instruct",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ]
        )
        return completion.choices[0].message.content

    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    completion = await client.chat.completions.create(
    model="meta-llama/llama-3.2-90b-vision-instruct",
    messages=[
        {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": prompt
            },
            image_message
        ]
        }
    ]
    )
    return completion.choices[0].message.content

async def async_qwen_2_vl(prompt, image_path=None):
    client = AsyncOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    )
    if image_path is None:
        completion = await client.chat.completions.create(
            model="qwen/qwen-2-vl-72b-instruct",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ]
        )
        return completion.choices[0].message.content
    
    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    completion = await client.chat.completions.create(
    model="qwen/qwen-2-vl-72b-instruct",
    messages=[
        {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": prompt
            },
            image_message
        ]
        }
    ]
    )
    return completion.choices[0].message.content

async def async_ui_tar(prompt, image_path=None):
    client = AsyncOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    )
    if image_path is None:
        completion = await client.chat.completions.create(
            model="bytedance-research/ui-tars-72b:free",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ]
        )
        return completion.choices[0].message.content
    
    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    completion = await client.chat.completions.create(
    model="qwen/qwen-2-vl-72b-instruct",
    messages=[
        {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": prompt
            },
            image_message
        ]
        }
    ]
    )
    return completion.choices[0].message.content

async def async_gemma_27b(prompt, image_path=None):
    client = AsyncOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    )
    if image_path is None:
        completion = await client.chat.completions.create(
            model="google/gemma-3-27b-it:free",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ]
        )
        return completion.choices[0].message.content
    
    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    completion = await client.chat.completions.create(
    model="google/gemma-3-27b-it:free",
    messages=[
        {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": prompt
            },
            image_message
        ]
        }
    ]
    )
    return completion.choices[0].message.content

async def async_gemma_3(prompt,image_path=None):
    # Create an OpenAI client with your deepinfra token and endpoint
    openai = AsyncOpenAI(
        api_key=os.getenv("DEEPINFRA_API_KEY"),
        base_url="https://api.deepinfra.com/v1/openai",
    )
    if image_path == None:
        chat_completion = await openai.chat.completions.create(
            model="google/gemma-3-27b-it",
            messages=[{"role": "user", "content": prompt}],
        )
        return chat_completion.choices[0].message.content
    
    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    chat_completion = await openai.chat.completions.create(
        model="google/gemma-3-27b-it",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    image_message
                ]
            }
        ],
    )
    return chat_completion.choices[0].message.content

async def async_phi(prompt,image_path=None):
    # Create an OpenAI client with your deepinfra token and endpoint
    openai = AsyncOpenAI(
        api_key=os.getenv("DEEPINFRA_API_KEY"),
        base_url="https://api.deepinfra.com/v1/openai",
    )
    if image_path == None:
        chat_completion = await openai.chat.completions.create(
            model="microsoft/Phi-4-multimodal-instruct",
            messages=[{"role": "user", "content": prompt}],
        )
        return chat_completion.choices[0].message.content
    
    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    chat_completion = await openai.chat.completions.create(
        model="microsoft/Phi-4-multimodal-instruct",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt},
                    image_message
                ]
            }
        ],
    )
    return chat_completion.choices[0].message.content

async def async_qwen_2_5(prompt,image_path=None):
    client = AsyncOpenAI(
        base_url="https://openrouter.ai/api/v1",
        api_key=os.getenv("OPENROUTER_API_KEY"),
    )
    if image_path is None:
        completion = await client.chat.completions.create(
            model="qwen/qwen2.5-vl-72b-instruct",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": prompt
                        }
                    ]
                }
            ]
        )
        return completion.choices[0].message.content
    
    image = Image.open(image_path)
    base64_image = encode_image_to_base64(image)
    image_message = {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}
    completion = await client.chat.completions.create(
    model="qwen/qwen2.5-vl-72b-instruct",
    messages=[
        {
        "role": "user",
        "content": [
            {
                "type": "text",
                "text": prompt
            },
            image_message
        ]
        }
    ]
    )
    return completion.choices[0].message.content

async def async_llava(prompt,image_path=None):
    if image_path is None:
        input = {
        "prompt": prompt
    }
    else:
        input = {
            "image": local_image_to_data_url(image_path),
            "prompt": prompt
        }
    output = await replicate.async_run(
        "yorickvp/llava-v1.6-vicuna-13b:0603dec596080fa084e26f0ae6d605fc5788ed2b1a0358cd25010619487eae63",
        input=input
    )
    # 收集所有输出片段
    result = ""
    async for item in output:
        result += item
    
    return result

# response = intern_VL("what is in the image?","document/basic_understanding/extracted_images_cartoon/easy/1d3c2531-8515-46a6-b4a6-71440be09636.png")
# print(response)

async def main():
    prompt = "what is in the image?"
    image_path = "document/basic_understanding/extracted_images_cartoon/easy/1d3c2531-8515-46a6-b4a6-71440be09636.png"

    response = await async_llava(prompt, image_path)
    print(response)

if __name__ == "__main__":
    asyncio.get_event_loop().run_until_complete(main())