from openai import OpenAI
from PIL import Image
import os
import base64
import json
from tqdm import tqdm
import traceback


def encode_image(image_path):
  with open(image_path, "rb") as image_file:
    return base64.b64encode(image_file.read()).decode('utf-8')

def encode_gif(image_path):
    frame = Image.open(image_path)
    nframes = 0
    encoded = []
    while frame:
        image_file = "feedback_fewshot/.cache/"+os.path.basename(image_path) + "-" +".jpg"
        frame.convert('RGB').save(image_file)
        with open(image_file, "rb") as image:
            encoded.append(base64.b64encode(image.read()).decode('utf-8'))
        nframes += 1
        if nframes == 8: break
        frame.seek(nframes)
        # print(nframes)
    
    return encoded

def encode_gif_2(image_path):
    frame = Image.open(image_path)
    nframes = 0
    encoded = []
    # Ensure the directory exists
    cache_dir = "feedback_fewshot/.cache/"
    os.makedirs(cache_dir, exist_ok=True)
    
    while frame:
        image_file = os.path.join(cache_dir, os.path.basename(image_path) + "-" + str(nframes) + ".jpg")
        frame.convert('RGB').save(image_file)
        with open(image_file, "rb") as image:
            encoded.append(base64.b64encode(image.read()).decode('utf-8'))
        nframes += 1
        try:
            frame.seek(nframes)
        except EOFError:
            break  # Break the loop if no more frames are found
    return encoded

def chat_with_openai_rf(image_paths, gif_paths, text_prompts):
    api_key = 'YOUR_API_KEY'
    print("Feedback started")

    # Initialize a conversation with the system role description
    
    messages_c=[
        {
            "role": "assistant",
            "content": [{ 
                            "type":"text",
                            "text": """Task: You are a video reviewer evaluating a sequence of actions presented as seven consecutive image uploads, which together represent a single video. You are going to accept the video if it completes the task and the video is consistent without glitches.
                       
                                    Query/Inference Phase:
                                        Inputs Provided:

                                            Textual Prompt: Describes the task the video should accomplish.
                                            Conditioning Image: Sets the fixed aspects of the scene.
                                            Sequence of Images (7 Frames): Represents consecutive moments in the video to be evaluated.

                                        Evaluation Process:

                                            View and Analyze Each Frame: Examine each of the seven images in sequence to understand the progression and continuity of actions.
                                            Assess Overall Coherence: Consider the sequence as a continuous scene to determine if the actions smoothly transition from one image to the next, maintaining logical progression.
                                            Check for Physical Accuracy: Ensure each frame adheres to the laws of physics, looking for any discrepancies in movement or positioning.
                                            Verify Task Completion: Check if the sequence collectively accomplishes the task described in the textual prompt.
                                            Identify Inconsistencies: Look for inconsistencies in object movement or overlaps that do not match the fixed scene elements shown in the conditioning image.

                                        Evaluation Criteria:

                                            Accept the sequence if it is as a coherent video which completes the task.
                                            Reject the sequence if any frame fails to meet the criteria, showing inconsistencies or not achieving the task. Reject even if there are the slightest errors. Be very strict.
                                            

                                        Response Requirement:

                                            Provide a single-word answer: Accept or Reject. Do not give reasoning.

                                        Additional Notes:

                                            You cannot request further clarification.
                                            The elements from the conditioning image must match those in each frame of the sequence."""
                            },
                     
            ]
        }
    ]
    responses = []
    print("Querying started")

    # Loop through each example provided
    for image_path, gif_path, text_prompt in zip(image_paths, gif_paths, text_prompts):
        # Conducting the conversation for each set of inputs
        if image_path == None or gif_path == None or text_prompt == None: 
            responses.append(None)
            continue
        while True:
            try:
                client = OpenAI(api_key=api_key)
                base_image = encode_image(image_path)
                base64_image_out = encode_gif(gif_path)
                messages_q=[
                    
                    #{"role": "assistant", "content": "Please upload the conditioning image."},
                    {
                        "role": "user", 
                        "content": {
                                    "type":"image_url", 
                                    "image_url":  {
                                        "url": f"data:image/gif;base64,{base_image}"
                                        },
                                    }
                        },
                    #{"role": "assistant", "content": "Now, upload the corresponding gif as 8 key frames."},
                    {
                        "role": "user", 
                        "content": [
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[0]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[1]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[2]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[3]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[4]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[5]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[6]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[7]}"
                                            },
                                        },
                            ]
                        },
                        {"role": "user", "content":"the conditioning image is the first upload, the next seven uploads are the key frames of the gif and the textual prompt is:" + text_prompt+ ". Return only the final decision"},
                        #,{"role": "user", "content": "also are there any collisions?"},
                ]
                response = client.chat.completions.create(model="gpt-4o",messages=messages_c + messages_q)


                # Store and print feedback
                feedback = response.choices[0].message.content
                responses.append(feedback)
                print("Feedback received:", feedback)
                break
            except:
                print("Bad Request, retrying")
                continue
    print("Print feedback generated successfully")
    return responses

def chat_with_openai_ithor(image_paths, gif_paths, text_prompts, actions_list):
    api_key = 'YOUR_API_KEY'
    # Initialize a conversation with the system role description
    print("Feedback started")

    # Initialize a conversation with the system role description
    
    messages_c=[
        {
            "role": "assistant",
            "content": [{ 
                            "type":"text",
                            "text": """

                                        You are a video reviewer evaluating a sequence of actions presented as eight consecutive image uploads, which together represent a single video. You are also given a textual prompt representing the final object or location the agent has to navigate to. Additionally, you are provided with a list of actions extracted from the video.

                                        Your goal is to determine whether the video reaches the final goal by following the extracted actions.

                                        Inputs Provided:

                                            Textual Prompt: A word representing the final location of the agent.
                                            Conditioning Image: An image that sets the fixed aspects of the scene.
                                            Sequence of Images (8 Frames): Consecutive images representing moments in the video to be evaluated.
                                            Sequence of Actions: A list representing the actions taken. Each action is one of the following: ["MoveAhead", "RotateRight", "RotateLeft", "Done"].

                                        Evaluation Process:

                                            Analyze Each Frame in Context:
                                                Examine each of the eight images in sequence, considering the corresponding action from the action list.
                                                This is just to provide you an idea of how the trajectory progresses.

                                            Verify Task Completion:
                                                Determine if the final goal specified in the textual prompt is reached by the end of the sequence.
                                                Focus on whether the final frame shows the agent near the final location or the object.

                                        Evaluation Criteria:

                                            Accept the sequence if it reaches the final goal. As long as the said object is in the final frame you should accept the sequence. Reaching the final goal in the sole metric for evaluation.
                                            Reject the sequence if it does not reach the final goal. 

                                        Response Requirement:

                                            Provide a single-word answer: Accept or Reject. Do not give any reasoning.

                                        Additional Notes:

                                            You cannot request further clarification.

                                        I will now give you the inputs"""
                            },
                     
            ]
        }
    ]
    responses = []
    print("Querying started")

    # Loop through each example provided
    for image_path, gif_path, text_prompt, actions in zip(image_paths, gif_paths, text_prompts, actions_list):
        # Conducting the conversation for each set of inputs
        if image_path == None or gif_path == None or text_prompt == None: 
            responses.append(None)
            continue
        while True:
            try:
                client = OpenAI(api_key=api_key)
                base_image = encode_image(image_path)
                base64_image_out = encode_gif_2(gif_path)
                actions_out = ""
                for action in actions:
                    actions_out += action + ", "
                actions_out = actions_out[:-2]
                print(actions_out)
                messages_q=[
                    
                    #{"role": "assistant", "content": "Please upload the conditioning image."},
                    {
                        "role": "user", 
                        "content": {
                                    "type":"image_url", 
                                    "image_url":  {
                                        "url": f"data:image/gif;base64,{base_image}"
                                        },
                                    }
                        },
                    #{"role": "assistant", "content": "Now, upload the corresponding gif as 8 key frames."},
                    {
                        "role": "user", 
                        "content": [
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[0]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[1]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[2]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[3]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[4]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[5]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[6]}"
                                            },
                                        },
                                    {
                                        "type":"image_url", 
                                        "image_url":  {
                                            "url": f"data:image/jpeg;base64,{base64_image_out[7]}"
                                            },
                                        },
                            ]
                        },
                        {"role": "user", "content":"the conditioning image is the first upload, the next eight uploads are the key frames of the gif and the textual prompt is:" + text_prompt+ ". The actions are: "+ actions_out+". Return only the final decision"},
                        #,{"role": "user", "content": "also are there any collisions?"},
                ]
                response = client.chat.completions.create(model="gpt-4o",messages=messages_c + messages_q)


                # Store and print feedback
                feedback = response.choices[0].message.content
                responses.append(feedback)
                print("Feedback received:", feedback)
                break
            except:
                print("Bad Request, retrying")
                traceback.print_exc()
                break
                continue
    print("Print feedback generated successfully")
    return responses

if __name__ == "__main__":
    # Example API Key and paths
    
    image_paths = ['feedback_fewshot/assembly_24_test.png']
    gif_paths = ['feedback_fewshot/assembly_checkpoint_276_out_3.gif']
    text_prompts = ['assembly']
    
    
    feedback_responses = chat_with_openai(image_paths, gif_paths, text_prompts)
    print("All responses:", feedback_responses)