{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle as pkl\n",
    "import base64\n",
    "from openai import OpenAI\n",
    "import imageio.v3 as iio\n",
    "from PIL import Image\n",
    "import re\n",
    "import imageio.v3 as iio\n",
    "from IPython.display import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize the OpenAI API\n",
    "api_key = ''\n",
    "\n",
    "client = OpenAI(\n",
    "  api_key=api_key,  # this is also the default, it can be omitted\n",
    ")\n",
    "\n",
    "# Function to encode the image\n",
    "def encode_image(image_path):\n",
    "    with open(image_path, \"rb\") as image_file:\n",
    "        return base64.b64encode(image_file.read()).decode(\"utf-8\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_task_specific(task_name):\n",
    "    if task_name in [\"object\", \"spatial\"]:\n",
    "        return \"The first word of the instruction should be `PICK', and then it should also include the word `PLACE'.\"\n",
    "    elif task_name == \"goal\":\n",
    "        return \"The first word of the instruction should remain the same.\"\n",
    "    elif task_name == \"10\":\n",
    "        return \"Words like `PUT' and `PICK' should remain the same.\"\n",
    "    else:\n",
    "        raise ValueError(f\"No task-specific rules defined for {task_name}\")\n",
    "\n",
    "def produce_alternate_prompts(task_name, version_number):\n",
    "    \"\"\"\n",
    "    Produce alternate prompts for a given task_name and version_number.\n",
    "    \n",
    "    version_number = 1  => Original version of produce_alternate_prompts\n",
    "    version_number = 2  => Includes task-specific bullet point (like produce_alternate_prompts_2)\n",
    "    version_number = 3  => Includes task-specific bullet point and \"Make changes as minor as possible\" bullet point\n",
    "                           (like produce_alternate_prompts_3)\n",
    "    \"\"\"\n",
    "    # Choose the output file path based on version_number\n",
    "    if version_number == 1:\n",
    "        output_path = f\"../data/{task_name}_alternate_instructions.pkl\"\n",
    "    elif version_number == 2:\n",
    "        output_path = f\"../data/{task_name}_alternate_instructions_2.pkl\"\n",
    "    elif version_number == 3:\n",
    "        output_path = f\"../data/{task_name}_alternate_instructions_3.pkl\"\n",
    "    else:\n",
    "        raise ValueError(\"version_number must be 1, 2, or 3.\")\n",
    "\n",
    "    # If version_number requires task-specific instructions, retrieve them\n",
    "    if version_number in [2, 3]:\n",
    "        task_specific = get_task_specific(task_name)\n",
    "        task_specific_line = f\"- {task_specific}\"\n",
    "    else:\n",
    "        task_specific_line = \"\"\n",
    "\n",
    "    # If version_number=3, add the additional 'minor changes' bullet\n",
    "    if version_number == 3:\n",
    "        minor_changes_line = \"- Make the changes as minor as possible, as the robot's language system is not very robust to rephrasing.\"\n",
    "    else:\n",
    "        minor_changes_line = \"\"\n",
    "\n",
    "    # Load the existing task descriptions\n",
    "    with open(f\"../data/{task_name}_descriptions.pkl\", \"rb\") as f:\n",
    "        task_descriptions = pkl.load(f)\n",
    "\n",
    "    # Container for all instructions\n",
    "    all_instructions = {}\n",
    "\n",
    "    # Loop over tasks\n",
    "    for task_idx in range(10):\n",
    "        task_description = task_descriptions[task_idx]\n",
    "        print(\"TASK:\", task_description, \"| IDX:\", task_idx)\n",
    "\n",
    "        # Process description for the video file name\n",
    "        processed_task_description = (\n",
    "            task_description.lower()\n",
    "            .replace(\" \", \"_\")\n",
    "            .replace(\"\\n\", \"_\")\n",
    "            .replace(\".\", \"_\")[:50]\n",
    "        )\n",
    "\n",
    "        video_path = (\n",
    "            f\"../openvla/rollouts/\"\n",
    "            f\"libero_{task_name}--{task_idx}--episode=1--task={processed_task_description}.mp4\"\n",
    "        )\n",
    "\n",
    "        # Read only the first frame from the video\n",
    "        frame = iio.imread(video_path, index=0)\n",
    "        image = Image.fromarray(frame)\n",
    "        image_path = f\"../data/image_grabs/{task_name}_{task_idx}_first_frame.png\"\n",
    "        image.save(image_path)\n",
    "        display(image)\n",
    "\n",
    "        # Build the prompt text\n",
    "        prompt_text = f\"\"\"You are generating alternative phrasings of a robotic task instruction while preserving its exact meaning.\n",
    "\n",
    "### Task Instruction:\n",
    "'{task_description}'\n",
    "\n",
    "### Instructions:\n",
    "- Generate **20** alternative ways to phrase the task instruction.\n",
    "- Keep each instruction **concise and unambiguous**.\n",
    "- Ensure the instructions remain suitable for a **robot, not a human**.\n",
    "- Only make **semantically meaningless** changes (e.g., word order, synonyms, slight rewording).\n",
    "- Double-check that the new instructions mean the same exact thing for the robot; do not just substitute synonyms without considering context.\n",
    "- Do **not** introduce additional steps, remove essential details, or alter the action.\n",
    "{minor_changes_line}\n",
    "{task_specific_line}\n",
    "\n",
    "### Output Format:\n",
    "Each rephrased instruction should be wrapped in `[instruction]` and `[/instruction]` tags, like this:\n",
    "[instruction] Rephrased instruction 1 [/instruction] \n",
    "[instruction] Rephrased instruction 2 [/instruction]\n",
    "\"\"\"\n",
    "\n",
    "        print(prompt_text)\n",
    "\n",
    "        # Convert image to base64 for sending to the model (example placeholder function)\n",
    "        base64_image = encode_image(image_path)\n",
    "\n",
    "        # Example placeholder API call\n",
    "        response = client.chat.completions.create(\n",
    "            model=\"gpt-4o-mini\",\n",
    "            messages=[\n",
    "                {\n",
    "                    \"role\": \"user\",\n",
    "                    \"content\": [\n",
    "                        {\"type\": \"text\", \"text\": prompt_text},\n",
    "                        {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{base64_image}\"}}\n",
    "                    ],\n",
    "                }\n",
    "            ],\n",
    "        )\n",
    "\n",
    "        # Retrieve the model output\n",
    "        output_text = response.choices[0].message.content\n",
    "        print(response.choices[0])\n",
    "\n",
    "        # Extract text between [instruction] and [/instruction]\n",
    "        instructions = re.findall(r\"\\[instruction\\](.*?)\\[/instruction\\]\", output_text, re.DOTALL)\n",
    "\n",
    "        # Clean up each extracted instruction\n",
    "        instructions = [instr.strip() for instr in instructions]\n",
    "\n",
    "        # Print them out for reference\n",
    "        for i, instruction in enumerate(instructions, 1):\n",
    "            print(f\"{i}: {instruction}\")\n",
    "        print(\"-\"*20)\n",
    "\n",
    "        # Store up to 20 instructions\n",
    "        all_instructions[task_idx] = instructions[:20]\n",
    "        # Ensure we have at least 20\n",
    "        assert len(instructions) >= 20 and len(all_instructions[task_idx]) >= 20\n",
    "\n",
    "    # Save the instructions to a file\n",
    "    with open(output_path, \"wb\") as f:\n",
    "        pkl.dump(all_instructions, f)\n",
    "    print(f\"Saved results to {output_path}\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_task_specific(task_name):\n",
    "\n",
    "    if task_name in [\"object\", \"spatial\"]:\n",
    "\n",
    "        return \"The first word of the instruction should be `PICK', and then it should also include the word `PLACE'.\"\n",
    "    \n",
    "    elif task_name == \"goal\":\n",
    "\n",
    "        return \"The first word of the instruction should remain the same.\"\n",
    "    \n",
    "    elif task_name == \"10\":\n",
    "\n",
    "        return \"Words like `PUT' and `PICK' should remain the same.\"\n",
    "    \n",
    "    else:\n",
    "        raise ValueError\n",
    "    \n",
    "\n",
    "def produce_alternate_prompts(task_name):\n",
    "    with open(f\"../data/{task_name}_descriptions.pkl\", \"rb\") as f:\n",
    "        task_descriptions = pkl.load(f)\n",
    "\n",
    "    all_instructions = dict()\n",
    "\n",
    "    for task_idx in range(10):\n",
    "\n",
    "        task_description = task_descriptions[task_idx]\n",
    "        print(\"TASK:\", task_description, \"| IDX:\", task_idx)\n",
    "\n",
    "        # Read the first frame\n",
    "        processed_task_description = task_description.lower().replace(\" \", \"_\").replace(\"\\n\", \"_\").replace(\".\", \"_\")[:50]\n",
    "        video_path = f\"../openvla/rollouts/libero_{task_name}--{task_idx}--episode=1--task={processed_task_description}.mp4\"\n",
    "        frame = iio.imread(video_path, index=0)  # Read only the first frame\n",
    "\n",
    "        # Convert to a PIL image and save as PNG\n",
    "        image = Image.fromarray(frame)\n",
    "        image_path = f\"../data/image_grabs/{task_name}_{task_idx}_first_frame.png\"\n",
    "        image.save(image_path)\n",
    "        display(image)\n",
    "\n",
    "        prompt_text = f\"\"\"You are generating alternative phrasings of a robotic task instruction while preserving its exact meaning. \n",
    "\n",
    "        ### Task Instruction:\n",
    "        '{task_description}'\n",
    "\n",
    "        ### Instructions:\n",
    "        - Generate **20** alternative ways to phrase the task instruction.\n",
    "        - Keep each instruction **concise and unambiguous**.\n",
    "        - Ensure the instructions remain suitable for a **robot, not a human**.\n",
    "        - Only make **semantically meaningless** changes (e.g., word order, synonyms, slight rewording).\n",
    "        - Double-check that the new instructions mean the same exact thing for the robot, do not just substitute synonyms without considering context.\n",
    "        - Do **not** introduce additional steps, remove essential details, or alter the action.\n",
    "\n",
    "        ### Output Format:\n",
    "        Each rephrased instruction should be wrapped in `[instruction]` and `[/instruction]` tags, like this:\n",
    "        [instruction] Rephrased instruction 1 [/instruction] \n",
    "        [instruction] Rephrased instruction 2 [/instruction] \n",
    "        \"\"\"\n",
    "\n",
    "        print(prompt_text)\n",
    "\n",
    "        # Getting the Base64 string\n",
    "        base64_image = encode_image(image_path)\n",
    "\n",
    "        response = client.chat.completions.create(\n",
    "            model=\"gpt-4o-mini\",\n",
    "            messages=[\n",
    "                {\n",
    "                    \"role\": \"user\",\n",
    "                    \"content\": [\n",
    "                        {\n",
    "                            \"type\": \"text\",\n",
    "                            \"text\": prompt_text,\n",
    "                        },\n",
    "                        {\n",
    "                            \"type\": \"image_url\",\n",
    "                            \"image_url\": {\"url\": f\"data:image/jpeg;base64,{base64_image}\"},\n",
    "                        },\n",
    "                    ],\n",
    "                }\n",
    "            ],\n",
    "        )\n",
    "\n",
    "        print(response.choices[0])\n",
    "\n",
    "        output_text = response.choices[0].message.content\n",
    "\n",
    "        # Extract text between [instruction] and [/instruction] using regex\n",
    "        instructions = re.findall(r\"\\[instruction\\](.*?)\\[/instruction\\]\", output_text, re.DOTALL)\n",
    "\n",
    "        # Remove any extra spaces\n",
    "        instructions = [instr.strip() for instr in instructions]\n",
    "\n",
    "        # Print the parsed list\n",
    "        for i, instruction in enumerate(instructions, 1):\n",
    "            print(f\"{i}: {instruction}\")\n",
    "\n",
    "        print(\"-\"*20)\n",
    "\n",
    "        \n",
    "\n",
    "        all_instructions[task_idx] = instructions[:20]\n",
    "\n",
    "        assert (len(instructions) >= 20) and (len(all_instructions[task_idx]) >= 20)\n",
    "\n",
    "\n",
    "    with open(f\"../data/{task_name}_alternate_instructions.pkl\", \"wb\") as f:\n",
    "        pkl.dump(all_instructions, f)\n",
    "        \n",
    "\n",
    "\n",
    "def produce_alternate_prompts_2(task_name):\n",
    "    with open(f\"../data/{task_name}_descriptions.pkl\", \"rb\") as f:\n",
    "        task_descriptions = pkl.load(f)\n",
    "\n",
    "    all_instructions = dict()\n",
    "    task_specific = get_task_specific(task_name)\n",
    "\n",
    "    for task_idx in range(10):\n",
    "\n",
    "        task_description = task_descriptions[task_idx]\n",
    "        print(\"TASK:\", task_description, \"| IDX:\", task_idx)\n",
    "\n",
    "        # Read the first frame\n",
    "        processed_task_description = task_description.lower().replace(\" \", \"_\").replace(\"\\n\", \"_\").replace(\".\", \"_\")[:50]\n",
    "        video_path = f\"../openvla/rollouts/libero_{task_name}--{task_idx}--episode=1--task={processed_task_description}.mp4\"\n",
    "        frame = iio.imread(video_path, index=0)  # Read only the first frame\n",
    "\n",
    "        # Convert to a PIL image and save as PNG\n",
    "        image = Image.fromarray(frame)\n",
    "        image_path = f\"../data/image_grabs/{task_name}_{task_idx}_first_frame.png\"\n",
    "        image.save(image_path)\n",
    "        display(image)\n",
    "\n",
    "        prompt_text = f\"\"\"You are generating alternative phrasings of a robotic task instruction while preserving its exact meaning. \n",
    "\n",
    "        ### Task Instruction:\n",
    "        '{task_description}'\n",
    "\n",
    "        ### Instructions:\n",
    "        - Generate **20** alternative ways to phrase the task instruction.\n",
    "        - Keep each instruction **concise and unambiguous**.\n",
    "        - Ensure the instructions remain suitable for a **robot, not a human**.\n",
    "        - Only make **semantically meaningless** changes (e.g., word order, synonyms, slight rewording).\n",
    "        - Double-check that the new instructions mean the same exact thing for the robot, do not just substitute synonyms without considering context.\n",
    "        - Do **not** introduce additional steps, remove essential details, or alter the action.\n",
    "        - {task_specific}\n",
    "\n",
    "        ### Output Format:\n",
    "        Each rephrased instruction should be wrapped in `[instruction]` and `[/instruction]` tags, like this:\n",
    "        [instruction] Rephrased instruction 1 [/instruction] \n",
    "        [instruction] Rephrased instruction 2 [/instruction] \n",
    "        \"\"\"\n",
    "\n",
    "        print(prompt_text)\n",
    "\n",
    "        # Getting the Base64 string\n",
    "        base64_image = encode_image(image_path)\n",
    "\n",
    "        response = client.chat.completions.create(\n",
    "            model=\"gpt-4o-mini\",\n",
    "            messages=[\n",
    "                {\n",
    "                    \"role\": \"user\",\n",
    "                    \"content\": [\n",
    "                        {\n",
    "                            \"type\": \"text\",\n",
    "                            \"text\": prompt_text,\n",
    "                        },\n",
    "                        {\n",
    "                            \"type\": \"image_url\",\n",
    "                            \"image_url\": {\"url\": f\"data:image/jpeg;base64,{base64_image}\"},\n",
    "                        },\n",
    "                    ],\n",
    "                }\n",
    "            ],\n",
    "        )\n",
    "\n",
    "        print(response.choices[0])\n",
    "\n",
    "        output_text = response.choices[0].message.content\n",
    "\n",
    "        # Extract text between [instruction] and [/instruction] using regex\n",
    "        instructions = re.findall(r\"\\[instruction\\](.*?)\\[/instruction\\]\", output_text, re.DOTALL)\n",
    "\n",
    "        # Remove any extra spaces\n",
    "        instructions = [instr.strip() for instr in instructions]\n",
    "\n",
    "        # Print the parsed list\n",
    "        for i, instruction in enumerate(instructions, 1):\n",
    "            print(f\"{i}: {instruction}\")\n",
    "\n",
    "        print(\"-\"*20)\n",
    "\n",
    "        \n",
    "\n",
    "        all_instructions[task_idx] = instructions[:20]\n",
    "\n",
    "        assert (len(instructions) >= 20) and (len(all_instructions[task_idx]) >= 20)\n",
    "\n",
    "\n",
    "    with open(f\"../data/{task_name}_alternate_instructions_2.pkl\", \"wb\") as f:\n",
    "        pkl.dump(all_instructions, f)\n",
    "\n",
    "\n",
    "def produce_alternate_prompts_3(task_name):\n",
    "    with open(f\"../data/{task_name}_descriptions.pkl\", \"rb\") as f:\n",
    "        task_descriptions = pkl.load(f)\n",
    "\n",
    "    all_instructions = dict()\n",
    "    task_specific = get_task_specific(task_name)\n",
    "\n",
    "    for task_idx in range(10):\n",
    "\n",
    "        task_description = task_descriptions[task_idx]\n",
    "        print(\"TASK:\", task_description, \"| IDX:\", task_idx)\n",
    "\n",
    "        # Read the first frame\n",
    "        processed_task_description = task_description.lower().replace(\" \", \"_\").replace(\"\\n\", \"_\").replace(\".\", \"_\")[:50]\n",
    "        video_path = f\"../openvla/rollouts/libero_{task_name}--{task_idx}--episode=1--task={processed_task_description}.mp4\"\n",
    "        frame = iio.imread(video_path, index=0)  # Read only the first frame\n",
    "\n",
    "        # Convert to a PIL image and save as PNG\n",
    "        image = Image.fromarray(frame)\n",
    "        image_path = f\"../data/image_grabs/{task_name}_{task_idx}_first_frame.png\"\n",
    "        image.save(image_path)\n",
    "        display(image)\n",
    "\n",
    "        prompt_text = f\"\"\"You are generating alternative phrasings of a robotic task instruction while preserving its exact meaning. \n",
    "\n",
    "        ### Task Instruction:\n",
    "        '{task_description}'\n",
    "\n",
    "        ### Instructions:\n",
    "        - Generate **20** alternative ways to phrase the task instruction.\n",
    "        - Make the changes as minor as possible, as the robot's language system is not very robust to rephrasing.\n",
    "        - Keep each instruction **concise and unambiguous**.\n",
    "        - Ensure the instructions remain suitable for a **robot, not a human**.\n",
    "        - Only make **semantically meaningless** changes (e.g., word order, synonyms, slight rewording).\n",
    "        - Double-check that the new instructions mean the same exact thing for the robot, do not just substitute synonyms without considering context.\n",
    "        - Do **not** introduce additional steps, remove essential details, or alter the action.\n",
    "        - {task_specific}\n",
    "\n",
    "        ### Output Format:\n",
    "        Each rephrased instruction should be wrapped in `[instruction]` and `[/instruction]` tags, like this:\n",
    "        [instruction] Rephrased instruction 1 [/instruction] \n",
    "        [instruction] Rephrased instruction 2 [/instruction] \n",
    "        \"\"\"\n",
    "\n",
    "        print(prompt_text)\n",
    "\n",
    "        # Getting the Base64 string\n",
    "        base64_image = encode_image(image_path)\n",
    "\n",
    "        response = client.chat.completions.create(\n",
    "            model=\"gpt-4o-mini\",\n",
    "            messages=[\n",
    "                {\n",
    "                    \"role\": \"user\",\n",
    "                    \"content\": [\n",
    "                        {\n",
    "                            \"type\": \"text\",\n",
    "                            \"text\": prompt_text,\n",
    "                        },\n",
    "                        {\n",
    "                            \"type\": \"image_url\",\n",
    "                            \"image_url\": {\"url\": f\"data:image/jpeg;base64,{base64_image}\"},\n",
    "                        },\n",
    "                    ],\n",
    "                }\n",
    "            ],\n",
    "        )\n",
    "\n",
    "        print(response.choices[0])\n",
    "\n",
    "        output_text = response.choices[0].message.content\n",
    "\n",
    "        # Extract text between [instruction] and [/instruction] using regex\n",
    "        instructions = re.findall(r\"\\[instruction\\](.*?)\\[/instruction\\]\", output_text, re.DOTALL)\n",
    "\n",
    "        # Remove any extra spaces\n",
    "        instructions = [instr.strip() for instr in instructions]\n",
    "\n",
    "        # Print the parsed list\n",
    "        for i, instruction in enumerate(instructions, 1):\n",
    "            print(f\"{i}: {instruction}\")\n",
    "\n",
    "        print(\"-\"*20)\n",
    "\n",
    "        \n",
    "\n",
    "        all_instructions[task_idx] = instructions[:20]\n",
    "\n",
    "        assert (len(instructions) >= 20) and (len(all_instructions[task_idx]) >= 20)\n",
    "\n",
    "\n",
    "    with open(f\"../data/{task_name}_alternate_instructions_3.pkl\", \"wb\") as f:\n",
    "        pkl.dump(all_instructions, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for suite in [\n",
    "    \"spatial\", \"object\", \"goal\"\n",
    "]:\n",
    "    produce_alternate_prompts(suite)\n",
    "    produce_alternate_prompts_2(suite)\n",
    "    produce_alternate_prompts_3(suite)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
