{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "3fMcenTtS3sO"
      },
      "outputs": [],
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "from tqdm.auto import tqdm\n",
        "import time\n",
        "import random\n",
        "import concurrent.futures\n",
        "\n",
        "import openai"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "EC0-hLT1S5qm"
      },
      "outputs": [],
      "source": [
        "template_meta_prompt = \"\"\"\n",
        "To achieve a more effective TASK description and INSTRUCTION and convey its core essence more clearly, please enhance the content in the quote by rephrasing and changing some information: \"{}\"\n",
        "\n",
        "Please return directly the modified description without additional description and quotes.\n",
        "The modified description:\n",
        "\"\"\"\n",
        "\n",
        "# z1_4_lin = \"\"\"Now you will help me minimize a function with two input variables, w and b. I have some (w, b) pairs and the function values at those points. The pairs are arranged in descending order based on their function values, where smaller function values indicate better results. Therefore, although the pairs are listed from the highest function value to the lowest, the pair with the smallest function value is considered the most optimal.\n",
        "# \"\"\"\n",
        "# z2_4_lin=\"\"\"Give me a new (w, b) pair that is different from all pairs above, and has a function value lower than any of the above. Do not write code. The output must end with a pair [w, b], where w and b are numerical values.\n",
        "# \"\"\"\n",
        "\n",
        "# z1_4_lin = \"\"\"You are given a list of points with coordinates below: <<POINTS>>\n",
        "\n",
        "# Below are some previous traces and their lengths. The traces are arranged in descending order based on their lengths, where smaller lengths indicate better solutions. Therefore, although the traces are listed from the largest length to the smallest, the trace with the smallest length is considered the most optimal:\n",
        "# \"\"\"\n",
        "# z2_4_lin = \"\"\"Give me a new trace that is different from all traces above, and has a length lower than any of the above. The trace should traverse all points exactly once. The trace should start with <trace> and end with </trace>.\n",
        "# \"\"\"\n",
        "\n",
        "z1_4_lin = \"\"\"You are a bandit algorithm in a room with 5 buttons labeled blue, green, red, yellow, purple. Each button is associated with a Bernoulli distribution with a fixed but unknown mean; the means for the buttons could be different. For each button, when you press it, you will get a reward that is sampled from the button's associated distribution. You have 100 time steps and, on each time step, you can choose any button and receive the reward. Your goal is to maximize the total reward over the 100 time steps.\"\"\"\n",
        "z2_4_lin = \"\"\"At each time step, I will show you a summary of your past choices and rewards. Then you must make the next choice. You may output a distribution over the 5 buttons formatted EXACTLY like \"blue:a,green:b,red:c,yellow:d,purple:e\".\"\"\""
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "1QvIDf-KS8jQ"
      },
      "outputs": [],
      "source": [
        "def request_gpt4(meta_prompt):\n",
        "    try:\n",
        "        response = openai.ChatCompletion.create(\n",
        "            model=\"gpt-4\",\n",
        "            messages=[\n",
        "                {\"role\":\"system\", \"content\": \"You are an expert whose task is to enhance and then rephrase the raw description to improve them\"},\n",
        "                {\"role\": \"user\", \"content\": meta_prompt}],\n",
        "            temperature=1.3\n",
        "        )\n",
        "        return response.choices[0].message['content'].strip()\n",
        "    except Exception as e:\n",
        "        print(f\"An error occurred: {e}\")\n",
        "        return \"Error\"\n",
        "\n",
        "def request_with_timeout(meta_prompt, timeout=10):\n",
        "    with concurrent.futures.ThreadPoolExecutor() as executor:\n",
        "        future = executor.submit(request_gpt4, meta_prompt)\n",
        "        try:\n",
        "            return future.result(timeout=timeout)\n",
        "        except concurrent.futures.TimeoutError:\n",
        "            print(f\"Request timed out after {timeout} seconds.\")\n",
        "            return \"Timeout Error\"\n",
        "\n",
        "z1_meta_prompt = template_meta_prompt.format(z1_4_lin)\n",
        "z2_meta_prompt = template_meta_prompt.format(z2_4_lin)\n",
        "z1_modified_list = []\n",
        "z2_modified_list = []"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "wceoKnGCTG4b"
      },
      "outputs": [],
      "source": [
        "openai.api_key = \"[API_KEY]\"\n",
        "def contains_required_tags(result):\n",
        "    return True\n",
        "\n",
        "def request_with_check(prompt, timeout, retries=3):\n",
        "    for attempt in range(retries):\n",
        "        result = request_with_timeout(prompt, timeout=timeout)\n",
        "\n",
        "        if result == \"Timeout Error\":\n",
        "            print(\"Retrying due to timeout...\")\n",
        "            continue\n",
        "\n",
        "        if contains_required_tags(result):\n",
        "            return result\n",
        "        else:\n",
        "            print(\"Retrying due to missing required tags...\")\n",
        "\n",
        "        time.sleep(5)\n",
        "\n",
        "    return \"Error: Unable to generate valid response after retries.\"\n",
        "\n",
        "gpt4_outputs = []\n",
        "\n",
        "for i in tqdm(range(100), desc=\"Processing GPT-4 requests\"):\n",
        "    result = request_with_check(z1_meta_prompt, timeout=20)\n",
        "\n",
        "    if result.startswith(\"Error\"):\n",
        "        print(f\"Error occurred: {result}\")\n",
        "        time.sleep(30)\n",
        "    else:\n",
        "        gpt4_outputs.append(result)\n",
        "\n",
        "\n",
        "gpt4_outputs.append(z1_4_lin)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "o_E1VykETQEh"
      },
      "outputs": [],
      "source": [
        "def contains_required_tags(result):\n",
        "    return True\n",
        "\n",
        "def request_with_check(prompt, timeout, retries=3):\n",
        "    for attempt in range(retries):\n",
        "        result = request_with_timeout(prompt, timeout=timeout)\n",
        "\n",
        "        if result == \"Timeout Error\":\n",
        "            print(\"Retrying due to timeout...\")\n",
        "            continue\n",
        "\n",
        "        if contains_required_tags(result):\n",
        "            return result\n",
        "        else:\n",
        "            print(\"Retrying due to missing required tags...\")\n",
        "\n",
        "        time.sleep(5)\n",
        "\n",
        "    return \"Error: Unable to generate valid response after retries.\"\n",
        "\n",
        "gpt4_outputs_2 = []\n",
        "\n",
        "for i in tqdm(range(100), desc=\"Processing GPT-4 requests\"):\n",
        "    result = request_with_check(z2_meta_prompt, timeout=20)\n",
        "\n",
        "    if result.startswith(\"Error\"):\n",
        "        print(f\"Error occurred: {result}\")\n",
        "        time.sleep(30)\n",
        "    else:\n",
        "        gpt4_outputs_2.append(result)\n",
        "\n",
        "gpt4_outputs_2.append(z2_4_lin)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "0CK5ug9kTXUo"
      },
      "outputs": [],
      "source": [
        "def normalize_l2(x):\n",
        "    x = np.array(x)\n",
        "    norm = np.linalg.norm(x)\n",
        "    if norm == 0:\n",
        "        return x\n",
        "    return x / norm\n",
        "\n",
        "\n",
        "def get_embedding(text, model=\"text-embedding-3-large\", target_dim=3072):\n",
        "    openai.api_base = \"https://api.openai.com/v1\"\n",
        "    openai.api_key = \"[API_KEY]\"\n",
        "    while True:\n",
        "        try:\n",
        "            response = openai.Embedding.create(\n",
        "                model=model,\n",
        "                input=text\n",
        "            )\n",
        "            embedding = response['data'][0]['embedding']\n",
        "\n",
        "            reduced_embedding = embedding[:target_dim]\n",
        "\n",
        "            normalized_embedding = normalize_l2(reduced_embedding)\n",
        "\n",
        "            return list(normalized_embedding)\n",
        "        except Exception as e:\n",
        "            print(f\"Error generating embedding: {e}\")\n",
        "            time.sleep(random.uniform(5, 10))\n",
        "\n",
        "embedding_dict_1 = {}\n",
        "embedding_dict_2 = {}\n",
        "\n",
        "for item1 in tqdm(gpt4_outputs, desc=\"Embedding Z1...\", unit=\"item\"):\n",
        "    embedding_dict_1[item1] = get_embedding(item1, model=\"text-embedding-3-large\", target_dim=3072)\n",
        "    time.sleep(random.uniform(0.5, 1.5))\n",
        "\n",
        "for item2 in tqdm(gpt4_outputs_2, desc=\"Embedding Z2...\", unit=\"item\"):\n",
        "    embedding_dict_2[item2] = get_embedding(item2, model=\"text-embedding-3-large\", target_dim=3072)\n",
        "    time.sleep(random.uniform(0.5, 1.5))\n",
        "\n",
        "merged_list = []\n",
        "embedding_list = []\n",
        "z1_list = []\n",
        "z2_list = []\n",
        "\n",
        "for item1 in tqdm(gpt4_outputs, desc=\"Concat Z1 and Z2\", unit=\"item\"):\n",
        "    for item2 in tqdm(gpt4_outputs_2, desc=\"Processing\", unit=\"item\", leave=False):\n",
        "        # merged_content = f\"{item1.strip()}\\n\\n{{}}\\n\\n{item2.strip()}\"\n",
        "        merged_content = f\"{item1.strip()}\\n{item2.strip()}\\n\"\n",
        "\n",
        "        embedding_item1 = embedding_dict_1[item1]\n",
        "        embedding_item2 = embedding_dict_2[item2]\n",
        "\n",
        "        combined_embedding = embedding_item1 + embedding_item2\n",
        "\n",
        "        merged_list.append(merged_content)\n",
        "        embedding_list.append(combined_embedding)\n",
        "        z1_list.append(item1)\n",
        "        z2_list.append(item2)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "43TfW4sQT3wr"
      },
      "outputs": [],
      "source": [
        "df = pd.DataFrame({\n",
        "    'Z1': z1_list,\n",
        "    'Z2': z2_list,\n",
        "    'Merged_Content': merged_list,\n",
        "    'Z1-Z2-Embedding': embedding_list\n",
        "})"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "O9fcinIkTjPb"
      },
      "outputs": [],
      "source": [
        "output_path = 'PATH/DATA_FILE.csv'\n",
        "df.to_csv(output_path, index=False)"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
