{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "959a3e96",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "\n",
    "api_key = \"your_key\"\n",
    "\n",
    "client = OpenAI(api_key=api_key)\n",
    "\n",
    "df = pd.read_json('orignial/BFCL_v3_multiple.json')\n",
    "\n",
    "questions = [q[0][0]['content'] for q in df[\"question\"]]\n",
    "\n",
    "def reformulate_strat_3(prompt):\n",
    "\n",
    "    response = client.responses.create(\n",
    "                model=\"gpt-4o-mini\",\n",
    "                input=[\n",
    "                    {\"role\": \"system\", \"content\": \"Reformulate the given prompt in 10 different ways. The intent should remain the same. Provide your answer in a list.\"},\n",
    "                    {\"role\": \"user\", \"content\": prompt}\n",
    "                ],\n",
    "                text = {\n",
    "                \"format\": {\n",
    "                    \"type\": \"json_schema\",\n",
    "                    \"name\": \"prompt_annotation\",  \n",
    "                    \"schema\": {\n",
    "                        \"type\": \"object\",\n",
    "                        \"properties\": {\n",
    "                            \"reformulated_questions\": {\n",
    "                                \"type\": \"array\",\n",
    "                                \"items\": {\n",
    "                                    \"type\": \"string\",\n",
    "                                },\n",
    "                            }\n",
    "                        },\n",
    "                        \"required\": [\"reformulated_questions\"],\n",
    "                        \"additionalProperties\": False\n",
    "                    },\n",
    "                    \"strict\": True\n",
    "                }\n",
    "            })\n",
    "    \n",
    "    return response\n",
    "\n",
    "results = []\n",
    "\n",
    "for i in tqdm(range(0, len(questions))):\n",
    "    q = questions[i]\n",
    "    id = i\n",
    "\n",
    "    reformulation = reformulate_strat_3(q)\n",
    "    \n",
    "    output = json.loads(reformulation.output_text)\n",
    "\n",
    "    results.append({\"sample_id\": id, \"questions\":q, \"reformulated_questions\": output})\n",
    "\n",
    "    df_results = pd.DataFrame(results)\n",
    "    df_results.to_json(\"data_augmentation/strategy_1.json\", orient=\"records\", indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5692865b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "\n",
    "api_key = \"your_key\"\n",
    "\n",
    "client = OpenAI(api_key=api_key)\n",
    "\n",
    "df = pd.read_json('orignial/BFCL_v3_multiple.json')\n",
    "\n",
    "questions = [q[0][0]['content'] for q in df[\"question\"]]\n",
    "\n",
    "def reformulate_strat_2(prompt, function_parameters):\n",
    "\n",
    "    response = client.responses.create(\n",
    "                model=\"gpt-4o-mini\",\n",
    "                input=[\n",
    "                    {\"role\": \"system\", \"content\": f\"Rewrite the given prompt with 10 different formulation request for a function-call. The function called should remain the same, but each prompt should trigger different parameters (different numbers, cities or countries, objects or person if allowed by the function's specification). Provide your 10 different prompts in a list. Here are the parameters of the function: {function_parameters}.\"},\n",
    "                    {\"role\": \"user\", \"content\": prompt}\n",
    "                ],\n",
    "                text = {\n",
    "                \"format\": {\n",
    "                    \"type\": \"json_schema\",\n",
    "                    \"name\": \"prompt_annotation\", \n",
    "                    \"schema\": {\n",
    "                        \"type\": \"object\",\n",
    "                        \"properties\": {\n",
    "                            \"reformulation_different_parameters\": {\n",
    "                                \"type\": \"array\",\n",
    "                                \"items\": {\n",
    "                                    \"type\": \"string\",\n",
    "                                },\n",
    "                            }\n",
    "                        },\n",
    "                        \"required\": [\"reformulation_different_parameters\"],\n",
    "                        \"additionalProperties\": False\n",
    "                    },\n",
    "                    \"strict\": True\n",
    "                }\n",
    "            })\n",
    "    \n",
    "    return response\n",
    "\n",
    "results = []\n",
    "\n",
    "for i in tqdm(range(0, len(questions))):\n",
    "    q = questions[i]\n",
    "    id = i\n",
    "\n",
    "    reformulation = reformulate_strat_2(q)\n",
    "    \n",
    "    output = json.loads(reformulation.output_text)\n",
    "\n",
    "    results.append({\"sample_id\": id, \"questions\":q, \"reformulated_questions\": output})\n",
    "\n",
    "    df_results = pd.DataFrame(results)\n",
    "    df_results.to_json(\"data_augmentation/strategy_2.json\", orient=\"records\", indent=4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "304c6249",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "import json\n",
    "from tqdm import tqdm\n",
    "import pandas as pd\n",
    "\n",
    "api_key = \"your_key\"\n",
    "\n",
    "client = OpenAI(api_key=api_key)\n",
    "\n",
    "df = pd.read_json('orignial/BFCL_v3_multiple.json')\n",
    "\n",
    "questions = [q[0][0]['content'] for q in df[\"question\"]]\n",
    "\n",
    "def reformulate_strat_3(prompt, ground_truth, new_ground_truth):\n",
    "\n",
    "    response = client.responses.create(\n",
    "                model=\"gpt-4o-mini\",\n",
    "                input=[\n",
    "                    {\"role\": \"system\", \"content\": f\"Rewrite the given prompt with 5 different formulation request for a function-call. The input prompt seeks to trigger this function: {ground_truth}. The prompts that you will generate should now seek to call the following function: {new_ground_truth}.\"},\n",
    "                    {\"role\": \"user\", \"content\": prompt}\n",
    "                ],\n",
    "                text = {\n",
    "                \"format\": {\n",
    "                    \"type\": \"json_schema\",\n",
    "                    \"name\": \"prompt_annotation\",  # Change the name to prompt_annotation\n",
    "                    \"schema\": {\n",
    "                        \"type\": \"object\",\n",
    "                        \"properties\": {\n",
    "                            \"reformulated_questions\": {\n",
    "                                \"type\": \"array\",\n",
    "                                \"items\": {\n",
    "                                    \"type\": \"string\",\n",
    "                                },\n",
    "                            }\n",
    "                        },\n",
    "                        \"required\": [\"reformulated_questions\"],\n",
    "                        \"additionalProperties\": False\n",
    "                    },\n",
    "                    \"strict\": True\n",
    "                }\n",
    "            })\n",
    "    \n",
    "    return response\n",
    "\n",
    "results = []\n",
    "\n",
    "for i in tqdm(range(0, len(questions))):\n",
    "    q = questions[i]\n",
    "    id = i\n",
    "\n",
    "    reformulation = reformulate_strat_3(q)\n",
    "    \n",
    "    output = json.loads(reformulation.output_text)\n",
    "\n",
    "    results.append({\"sample_id\": id, \"questions\":q, \"reformulated_questions\": output})\n",
    "\n",
    "    df_results = pd.DataFrame(results)\n",
    "    df_results.to_json(\"data_augmentation/strategy_3.json\", orient=\"records\", indent=4)"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
