{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "09681346-d2bb-4726-af46-4e431fa8d487",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "import openai\n",
    "import pandas as pd\n",
    "\n",
    "module_path = os.path.abspath( \n",
    "    os.path.join('..')) \n",
    "if module_path not in sys.path:\n",
    "    sys.path.append(module_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a29f319b-6554-4054-8aa4-b824e85a85e2",
   "metadata": {},
   "outputs": [],
   "source": [
    "%env OPENAI_API_KEY=<Enter you key here>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "5af205ab-9fe0-484b-b6f7-2cd49291402f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import json\n",
    "\n",
    "from IPython.core.display import HTML\n",
    "from functools import partial\n",
    "\n",
    "from utils import ProgramGenerator, ProgramInterpreter\n",
    "from tasks.extract_data import create_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "84399078-a5e0-4d45-8dfe-ba6a2f038f02",
   "metadata": {},
   "outputs": [],
   "source": [
    "prompter = partial(create_prompt,method='all',func_pool=False, example_type = \"comb_tasks\") #Consider all task examples\n",
    "generator = ProgramGenerator(prompter=prompter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "6c286faa-a8fd-4c16-8db4-c808772e350c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Question</th>\n",
       "      <th>Program</th>\n",
       "      <th>Answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Perform a trend, seasonality, and neighbourhoo...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = 0.00001, P-value = 0.6...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Perform a trend and neighbourhood analysis usi...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = -0.00131, P-value = 0....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Perform a trend and neighbourhood analysis usi...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = -0.00068, P-value = 0....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Perform a seasonality analysis on the historic...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Seasonality Analysis: Daily Strength = 0.41720...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Perform a trend and seasonality analysis on th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = -0.00005, P-value = 0....</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>Examine the historical air quality data for th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Anomaly Values and Timestamps: utc_time\\n2014-...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>Examine the historical air quality data for th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>No anomalies detected in air quality data for ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>Examine the historical traffic speed data for ...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Anomaly Values and Timestamps: 2012-03-01 00:1...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>Examine and identify the anomalies in the hist...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>No anomalies detected in air quality data for ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>Examine the historical air quality data for th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>No anomalies detected in air quality data for ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                              Question  \\\n",
       "0    Perform a trend, seasonality, and neighbourhoo...   \n",
       "1    Perform a trend and neighbourhood analysis usi...   \n",
       "2    Perform a trend and neighbourhood analysis usi...   \n",
       "3    Perform a seasonality analysis on the historic...   \n",
       "4    Perform a trend and seasonality analysis on th...   \n",
       "..                                                 ...   \n",
       "145  Examine the historical air quality data for th...   \n",
       "146  Examine the historical air quality data for th...   \n",
       "147  Examine the historical traffic speed data for ...   \n",
       "148  Examine and identify the anomalies in the hist...   \n",
       "149  Examine the historical air quality data for th...   \n",
       "\n",
       "                                               Program  \\\n",
       "0    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "1    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "2    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "3    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "4    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "..                                                 ...   \n",
       "145  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "146  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "147  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "148  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "149  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "\n",
       "                                                Answer  \n",
       "0    Trend Analysis: Slope = 0.00001, P-value = 0.6...  \n",
       "1    Trend Analysis: Slope = -0.00131, P-value = 0....  \n",
       "2    Trend Analysis: Slope = -0.00068, P-value = 0....  \n",
       "3    Seasonality Analysis: Daily Strength = 0.41720...  \n",
       "4    Trend Analysis: Slope = -0.00005, P-value = 0....  \n",
       "..                                                 ...  \n",
       "145  Anomaly Values and Timestamps: utc_time\\n2014-...  \n",
       "146  No anomalies detected in air quality data for ...  \n",
       "147  Anomaly Values and Timestamps: 2012-03-01 00:1...  \n",
       "148  No anomalies detected in air quality data for ...  \n",
       "149  No anomalies detected in air quality data for ...  \n",
       "\n",
       "[150 rows x 3 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "all_df = pd.read_csv(\"data/STReason_FullDataset.csv\")\n",
    "all_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ceb7dd58-0545-4cee-a0b8-6f10d4814535",
   "metadata": {},
   "outputs": [],
   "source": [
    "gen_programs = []\n",
    "queries = []\n",
    "ground_truth_programs = []\n",
    "ground_truth_answers = []\n",
    "gen_ans = []\n",
    "\n",
    "for i in range(len(all_df)):\n",
    "    query = all_df[\"Question\"][i]\n",
    "    ground_truth_program = all_df[\"Program\"][i]\n",
    "    ground_truth_answer = all_df[\"Answer\"][i]\n",
    "\n",
    "    try:\n",
    "        gen_prog, _ = generator.generate(dict(question=query))\n",
    "        # print(query)\n",
    "        # print(gen_prog)\n",
    "        \n",
    "        interpreter = ProgramInterpreter()\n",
    "        init_state = {}\n",
    "        result, prog_state, summary_text = interpreter.execute(gen_prog, init_state, inspect=True)\n",
    "        data_input = result\n",
    "        # print(result)\n",
    "\n",
    "        if i<50:\n",
    "            # Call the OpenAI ChatCompletion API\n",
    "            response = openai.ChatCompletion.create(\n",
    "                model=\"gpt-3.5-turbo\",  #or gpt-4o-mini or gpt-4\n",
    "                messages=[\n",
    "                    {\"role\": \"user\", \"content\": (\n",
    "                        f\"You are an expert in spatio temporal data analysis. {query}.\\n\"\n",
    "                        f\"{data_input} \\n\"\n",
    "                    )}\n",
    "                ],\n",
    "                temperature=0.7,\n",
    "                max_tokens=4096,\n",
    "                top_p=1,\n",
    "                frequency_penalty=0,\n",
    "                presence_penalty=0\n",
    "            )\n",
    "            gen_answer = response['choices'][0]['message']['content'].strip()\n",
    "\n",
    "        elif i<100:\n",
    "            # Call the OpenAI ChatCompletion API\n",
    "            response = openai.ChatCompletion.create(\n",
    "                model=\"gpt-3.5-turbo\",  \n",
    "                messages=[\n",
    "                    {\"role\": \"user\", \"content\": (\n",
    "                        f\"You are an expert in spatiotemporal data forecasting. {query}.\\n\"\n",
    "                        f\"{data_input} \\n\"\n",
    "                    )}\n",
    "                ],\n",
    "                temperature=0.7,\n",
    "                max_tokens=4096,\n",
    "                top_p=1,\n",
    "                frequency_penalty=0,\n",
    "                presence_penalty=0\n",
    "            )\n",
    "            gen_answer = response['choices'][0]['message']['content'].strip()\n",
    "            \n",
    "        else:\n",
    "            # Call the OpenAI ChatCompletion API\n",
    "            response = openai.ChatCompletion.create(\n",
    "                model=\"gpt-3.5-turbo\",  \n",
    "                messages=[\n",
    "                    {\"role\": \"user\", \"content\": (\n",
    "                        f\"You are an expert in spatiotemporal data anomaly detection. {query}.\\n\"\n",
    "                        f\"Input Data: {data_input} \\n\"\n",
    "                    )}\n",
    "                ],\n",
    "                temperature=0.7,\n",
    "                max_tokens=4096,\n",
    "                top_p=1,\n",
    "                frequency_penalty=0,\n",
    "                presence_penalty=0\n",
    "            )\n",
    "            gen_answer = response['choices'][0]['message']['content'].strip()\n",
    "\n",
    "  \n",
    "        # if i<50:\n",
    "        gen_programs.append(gen_prog)\n",
    "        gen_ans.append(gen_answer)\n",
    "        queries.append(query)\n",
    "        ground_truth_programs.append(ground_truth_program)\n",
    "        ground_truth_answers.append(ground_truth_answer)\n",
    "        # print(\"Generated Output:\", gen_answer)\n",
    "    \n",
    "    except Exception as e:\n",
    "        print(f\"Error processing query at {i}: {str(e)}\")\n",
    "        print(f\"Query:{query}\")\n",
    "        print(f\"Generated Program:{gen_prog}\")\n",
    "        continue  \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "ab9e2847-048b-47d1-b37f-9a3867a653d3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "150\n",
      "150\n",
      "150\n",
      "150\n",
      "150\n"
     ]
    }
   ],
   "source": [
    "print(len(queries))\n",
    "print(len(ground_truth_programs))\n",
    "print(len(ground_truth_answers))\n",
    "print(len(gen_programs))\n",
    "print(len(gen_ans_gpt3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "6f8c9761-004d-457b-b630-a0d1b52e6d98",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Query</th>\n",
       "      <th>Program</th>\n",
       "      <th>Answer</th>\n",
       "      <th>Gen_Program</th>\n",
       "      <th>Gen_Answer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Perform a trend, seasonality, and neighbourhoo...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = 0.00001, P-value = 0.6...</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"402...</td>\n",
       "      <td>To analyze the trend, seasonality, and neighbo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Perform a trend and neighbourhood analysis usi...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = -0.00131, P-value = 0....</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"407...</td>\n",
       "      <td>To perform the trend and neighbourhood analysi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Perform a trend and neighbourhood analysis usi...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = -0.00068, P-value = 0....</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"407...</td>\n",
       "      <td>To perform the trend and neighborhood analysis...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Perform a seasonality analysis on the historic...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Seasonality Analysis: Daily Strength = 0.41720...</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"400...</td>\n",
       "      <td>To perform a seasonality analysis on the histo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Perform a trend and seasonality analysis on th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Trend Analysis: Slope = -0.00005, P-value = 0....</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"400...</td>\n",
       "      <td>To perform the trend and seasonality analysis ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>Examine the historical air quality data for th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Anomaly Values and Timestamps: utc_time\\n2014-...</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"401...</td>\n",
       "      <td>Anomalies in the historical air quality data a...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>Examine the historical air quality data for th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>No anomalies detected in air quality data for ...</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"400...</td>\n",
       "      <td>Based on the historical air quality data for t...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>Examine the historical traffic speed data for ...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>Anomaly Values and Timestamps: 2012-03-01 00:1...</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"769...</td>\n",
       "      <td>To detect anomalies in traffic patterns at loc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>Examine and identify the anomalies in the hist...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>No anomalies detected in air quality data for ...</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"nan...</td>\n",
       "      <td>Based on the provided data for the historical ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>149</th>\n",
       "      <td>Examine the historical air quality data for th...</td>\n",
       "      <td>[{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...</td>\n",
       "      <td>No anomalies detected in air quality data for ...</td>\n",
       "      <td>DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"401...</td>\n",
       "      <td>Based on the historical air quality data for l...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>150 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 Query  \\\n",
       "0    Perform a trend, seasonality, and neighbourhoo...   \n",
       "1    Perform a trend and neighbourhood analysis usi...   \n",
       "2    Perform a trend and neighbourhood analysis usi...   \n",
       "3    Perform a seasonality analysis on the historic...   \n",
       "4    Perform a trend and seasonality analysis on th...   \n",
       "..                                                 ...   \n",
       "145  Examine the historical air quality data for th...   \n",
       "146  Examine the historical air quality data for th...   \n",
       "147  Examine the historical traffic speed data for ...   \n",
       "148  Examine and identify the anomalies in the hist...   \n",
       "149  Examine the historical air quality data for th...   \n",
       "\n",
       "                                               Program  \\\n",
       "0    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "1    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "2    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "3    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "4    [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "..                                                 ...   \n",
       "145  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "146  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "147  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "148  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "149  [{'order': 1, 'function': 'LOAD_SPATIOTEMPORAL...   \n",
       "\n",
       "                                                Answer  \\\n",
       "0    Trend Analysis: Slope = 0.00001, P-value = 0.6...   \n",
       "1    Trend Analysis: Slope = -0.00131, P-value = 0....   \n",
       "2    Trend Analysis: Slope = -0.00068, P-value = 0....   \n",
       "3    Seasonality Analysis: Daily Strength = 0.41720...   \n",
       "4    Trend Analysis: Slope = -0.00005, P-value = 0....   \n",
       "..                                                 ...   \n",
       "145  Anomaly Values and Timestamps: utc_time\\n2014-...   \n",
       "146  No anomalies detected in air quality data for ...   \n",
       "147  Anomaly Values and Timestamps: 2012-03-01 00:1...   \n",
       "148  No anomalies detected in air quality data for ...   \n",
       "149  No anomalies detected in air quality data for ...   \n",
       "\n",
       "                                           Gen_Program  \\\n",
       "0    DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"402...   \n",
       "1    DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"407...   \n",
       "2    DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"407...   \n",
       "3    DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"400...   \n",
       "4    DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"400...   \n",
       "..                                                 ...   \n",
       "145  DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"401...   \n",
       "146  DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"400...   \n",
       "147  DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"769...   \n",
       "148  DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"nan...   \n",
       "149  DATA0 = LOAD_SPATIOTEMPORAL_DATA(location=\"401...   \n",
       "\n",
       "                                            Gen_Answer  \n",
       "0    To analyze the trend, seasonality, and neighbo...  \n",
       "1    To perform the trend and neighbourhood analysi...  \n",
       "2    To perform the trend and neighborhood analysis...  \n",
       "3    To perform a seasonality analysis on the histo...  \n",
       "4    To perform the trend and seasonality analysis ...  \n",
       "..                                                 ...  \n",
       "145  Anomalies in the historical air quality data a...  \n",
       "146  Based on the historical air quality data for t...  \n",
       "147  To detect anomalies in traffic patterns at loc...  \n",
       "148  Based on the provided data for the historical ...  \n",
       "149  Based on the historical air quality data for l...  \n",
       "\n",
       "[150 rows x 5 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Creating the DataFrame\n",
    "gpt3_df = pd.DataFrame({\n",
    "    'Query': queries,\n",
    "    'Program': ground_truth_programs,\n",
    "    'Answer': ground_truth_answers,\n",
    "    'Gen_Program':gen_programs,\n",
    "    'Gen_Answer':gen_ans})\n",
    "\n",
    "gpt3_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "id": "fe89463c-d1d9-4ac4-8c5e-9cc9b393d4ff",
   "metadata": {},
   "outputs": [],
   "source": [
    "gpt3_df.to_csv('data/gpt3_answers_new.csv',index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
