
import base64
import io
import requests
import numpy as np
from PIL import Image
import re

# ----- Conversion Helpers -----
def image_to_data_uri(image_path: str) -> str:
    """Convert image file path to data URI"""
    with open(image_path, "rb") as f:
        encoded = base64.b64encode(f.read()).decode('utf-8')
    ext = image_path.split('.')[-1].lower()
    mime = "jpeg" if ext in ["jpg", "jpeg"] else ext
    return f"data:image/{mime};base64,{encoded}"

def image_bytes_to_data_uri(image_bytes: bytes, ext: str = "jpeg") -> str:
    """Convert raw image bytes to data URI"""
    encoded = base64.b64encode(image_bytes).decode('utf-8')
    return f"data:image/{ext};base64,{encoded}"

def array_to_data_uri(img_array: np.ndarray, ext: str = "png") -> str:
    """Convert a NumPy array (H x W x C) to Data URI"""
    img = Image.fromarray(img_array.astype('uint8'))
    buffer = io.BytesIO()
    img.save(buffer, format=ext.upper())
    buffer.seek(0)
    return image_bytes_to_data_uri(buffer.read(), ext)

def pil_to_data_uri(img: Image.Image, ext: str = "png") -> str:
    """Convert a PIL image to Data URI"""
    buffer = io.BytesIO()
    img.save(buffer, format=ext.upper())
    buffer.seek(0)
    return image_bytes_to_data_uri(buffer.read(), ext)



def make_prompt_for_object(object_name: str) -> str:
    return (
        f"Focus only on the red bounding box in the image. "
        f"Ignore everything outside the box. "
        f"Determine if the object '{object_name}' is present inside the box. "
        f"It is valid even if only part of the object appears in the box, "
        f"as long as the main subject is included. "
        f"Answer strictly with 'Yes' or 'No'."
    )



# ----- Send to OpenRouter VLM -----
def send_to_vlm_openrouter(data_uri: str, prompt: str, api_key: str, model: str) -> str | None:
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
        "HTTP-Referer": "https://yourdomain.com",
        "X-Title": "VLMBatchAnalyzer"
    }

    payload = {
        "model": model,
        "messages": [{
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": data_uri}}
            ]
        }]
    }

    try:
        response = requests.post(
            "https://openrouter.ai/api/v1/chat/completions",
            headers=headers, json=payload
        )
        result = response.json()
        if "choices" in result:
            return result["choices"][0]["message"]["content"]
        else:
            print("[!] Error in OpenRouter response:", result)
            return None
    except Exception as e:
        print("[!] Request exception:", e)
        return None


# ----- Analyze Image -----
def analyze_image_for_object(
    image_input: str | bytes | np.ndarray | Image.Image, 
    object_name: str, 
    api_key: str, 
    model_name: str,
    ext: str = "png"
) -> bool | None:
    """
    Detect object in image.
    image_input can be:
        - str: file path
        - bytes: raw image bytes
        - np.ndarray: HxWxC array
        - PIL.Image.Image
    """
    if isinstance(image_input, str):
        data_uri = image_to_data_uri(image_input)
    elif isinstance(image_input, bytes):
        data_uri = image_bytes_to_data_uri(image_input, ext)
    elif isinstance(image_input, np.ndarray):
        data_uri = array_to_data_uri(image_input, ext)
    elif isinstance(image_input, Image.Image):
        data_uri = pil_to_data_uri(image_input, ext)
    else:
        raise ValueError("Unsupported image_input type")

    prompt = make_prompt_for_object(object_name)
    rtext = send_to_vlm_openrouter(data_uri, prompt, api_key, model_name)
    if rtext is None:
        return None
    rtext = rtext.lower()
    return ("y" in rtext or "true" in rtext)


def make_prompt_for_view(object_name: str, orientation: str) -> str:
    prompt = (
        f"In the provided image, there are multiple sub-images arranged in a grid, "
        f"each surrounded by a blue border and labeled with a number at the top. "
        f"In each sub-image, the target object '{object_name}' is highlighted with a red bounding box. "
        f"Ignore all other objects and details. "
        f"Your task is to determine in which sub-image the red-boxed object is shown "
        f"from the '{orientation}' view. "
        f"Only output the number written above that sub-image. Do not output anything else."
    )
    return prompt
    



def analyze_view_for_object(
    image_input: str | bytes | np.ndarray | Image.Image, 
    object_name: str, 
    orientation: str, 
    api_key: str, 
    model_name: str,
    ext: str = "png"
) -> bool | None:
    """
    Detect object in image.
    image_input can be:
        - str: file path
        - bytes: raw image bytes
        - np.ndarray: HxWxC array
        - PIL.Image.Image
    """
    if isinstance(image_input, str):
        data_uri = image_to_data_uri(image_input)
    elif isinstance(image_input, bytes):
        data_uri = image_bytes_to_data_uri(image_input, ext)
    elif isinstance(image_input, np.ndarray):
        data_uri = array_to_data_uri(image_input, ext)
    elif isinstance(image_input, Image.Image):
        data_uri = pil_to_data_uri(image_input, ext)
    else:
        raise ValueError("Unsupported image_input type")

    prompt = make_prompt_for_view(object_name, orientation)
    rtext = send_to_vlm_openrouter(data_uri, prompt, api_key, model_name)
    if rtext is None:
        return None
    rtext = rtext.lower()
    match = re.search(r"\d+", rtext) 
    m = match.group(0) if match else None
    return int(m)







