{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from tqdm import tqdm \n",
    "\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "\n",
    "from math500.math_utils import * \n",
    "from math500.parser import *\n",
    "from math500.grader import * \n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "client = OpenAI(\n",
    "    api_key=os.environ.get(\"OPENAI_API_KEY\"),\n",
    ")\n",
    "\n",
    "def load_data_and_fewshot(args):\n",
    "    if args.task == \"math500\": \n",
    "        file_path = f\"data/math500/test.jsonl\"\n",
    "        with open(file_path, 'r', encoding='utf-8') as f:\n",
    "            dataset = [json.loads(line) for line in f]\n",
    "        fewshot = load_prompt(num_shots=5)\n",
    "\n",
    "    else: \n",
    "        return None, None\n",
    "    \n",
    "    return dataset, fewshot\n",
    "\n",
    "def construct_prompt(args, dataset, fewshot): \n",
    "    samples = []\n",
    "    if args.task == \"math500\":\n",
    "        system_prompt = \"Please reason step by step, and put your final answer within \\\\boxed{{}}.\\n\"\n",
    "\n",
    "        for idx, entry in tqdm(enumerate(dataset)): \n",
    "            user_prompt = \"\"\n",
    "            if fewshot != None: \n",
    "                user_prompt = \"\\n\\n\".join([f\"{q}\\n\\n{a}\" for q, a in fewshot]) + \"\\n\\n\" \n",
    "            user_prompt += entry['problem'] + \"\\n\"\n",
    "            message = [{\"role\": \"system\",\"content\": system_prompt},{\"role\": \"user\",\"content\": user_prompt}]\n",
    "            sample = {\"idx\": idx,\"prompt\": message,\"entry\": entry}\n",
    "            samples.append(sample)\n",
    "\n",
    "    else: \n",
    "        return None\n",
    "\n",
    "    return samples\n",
    "\n",
    "def generate_model_output(model: str, prompt: str, temperature: float = 1.0, n: int = 5) -> str:        \n",
    "    responses = client.chat.completions.create(\n",
    "        model=model,\n",
    "        messages=prompt,\n",
    "        n=n,\n",
    "        temperature=temperature,\n",
    "    )\n",
    "    outputs = [choice.message.content for choice in responses.choices]\n",
    "\n",
    "    return outputs "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main(config):\n",
    "    save_path = f\"{config.output_dir}/{config.task}/{config.model}/{config.task}_{config.shot_type}.jsonl\"\n",
    "    os.makedirs(os.path.dirname(save_path), exist_ok=True)\n",
    "\n",
    "    dataset, fewshot = load_data_and_fewshot(config)\n",
    "    if config.shot_type == \"zero\":\n",
    "        fewshot = None\n",
    "        \n",
    "    samples = construct_prompt(config, dataset, fewshot)\n",
    "    if config.num_examples != -1: \n",
    "        samples = samples[:config.num_examples] \n",
    "\n",
    "    if samples:\n",
    "        print(f\"Model: {config.model} Task: {config.task}, Shot: {config.shot_type}, Temperature: {config.temperature}, N: {config.n}\")\n",
    "        print(\"-\" * 50)\n",
    "        prompt = samples[0][\"prompt\"]\n",
    "        for message in prompt:\n",
    "            print(f\"Role:\\n{message['role']}\")\n",
    "            print(f\"Content:\\n{message['content']}\")\n",
    "            print(\"-\" * 50)\n",
    "    else:\n",
    "        print(f\"No samples found for Task: {config.task}, Shot: {config.shot_type}, Temperature: {config.temperature}, N: {config.n}\")\n",
    "\n",
    "    if os.path.exists(save_path):\n",
    "        with open(save_path, 'r', encoding='utf-8') as f:\n",
    "            existing_data = {json.loads(line)['idx'] for line in f}  \n",
    "    else:\n",
    "        existing_data = set()  \n",
    "\n",
    "    flag = 0\n",
    "    if samples:\n",
    "        with open(save_path, \"a\", encoding='utf-8') as f:  \n",
    "            for sample in tqdm(samples, total=len(samples)):\n",
    "                if sample['idx'] in existing_data:  \n",
    "                    continue\n",
    "                try:\n",
    "                    model_outputs = generate_model_output(config.model, sample[\"prompt\"], config.temperature, config.n)\n",
    "                    sample[\"model_outputs\"] = model_outputs\n",
    "                    if flag == 0:\n",
    "                        print(model_outputs[0])\n",
    "                        print(\"-\" * 50)\n",
    "                        flag = 1\n",
    "                    json.dump(sample, f)\n",
    "                    f.write(\"\\n\")\n",
    "                except Exception as e:\n",
    "                    print(f\"Error processing sample {sample['idx']}: {e}\")\n",
    "                    break\n",
    "\n",
    "        print(f\"Results saved to {save_path}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Config:\n",
    "    def __init__(self):\n",
    "        self.model = \"gpt-4o\"  \n",
    "        self.task = \"drop\"  \n",
    "        self.shot_type = \"few\" \n",
    "        self.output_dir = \"result\"\n",
    "        self.num_examples = 1\n",
    "        self.temperature = 1\n",
    "        self.n = 5\n",
    "        self.seed = 42\n",
    "        \n",
    "tasks = ['math500']\n",
    "shots = [\"few\", \"zero\"]\n",
    "models = ['gpt-4o-mini']\n",
    "\n",
    "config = Config()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "==================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "500it [00:00, 346236.09it/s]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: gpt-4o-mini Task: math500, Shot: few, Temperature: 1, N: 5\n",
      "--------------------------------------------------\n",
      "Role:\n",
      "system\n",
      "Content:\n",
      "Please reason step by step, and put your final answer within \\boxed{{}}.\n",
      "\n",
      "--------------------------------------------------\n",
      "Role:\n",
      "user\n",
      "Content:\n",
      "Kevin Kangaroo begins hopping on a number line at 0. He wants to get to 1, but he can hop only $\\frac{1}{3}$ of the distance. Each hop tires him out so that he continues to hop $\\frac{1}{3}$ of the remaining distance. How far has he hopped after five hops? Express your answer as a common fraction.\n",
      "\n",
      "Let's think step by step\n",
      "Kevin hops $1/3$ of the remaining distance with every hop.\n",
      "His first hop takes $1/3$ closer.\n",
      "For his second hop, he has $2/3$ left to travel, so he hops forward $(2/3)(1/3)$.\n",
      "For his third hop, he has $(2/3)^2$ left to travel, so he hops forward $(2/3)^2(1/3)$.\n",
      "In general, Kevin hops forward $(2/3)^{k-1}(1/3)$ on his $k$th hop.\n",
      "We want to find how far he has hopped after five hops.\n",
      "This is a finite geometric series with first term $1/3$, common ratio $2/3$, and five terms.\n",
      "Thus, Kevin has hopped $\\frac{\\frac{1}{3}\\left(1-\\left(\\frac{2}{3}\\right)^5\\right)}{1-\\frac{2}{3}} = \\boxed{\\frac{211}{243}}$.\n",
      "The answer is \\frac{211}{243}}\n",
      "\n",
      "What is the area of the region defined by the equation $x^2+y^2 - 7 = 4y-14x+3$?\n",
      "\n",
      "Let's think step by step\n",
      "We rewrite the equation as $x^2 + 14x + y^2 - 4y = 10$ and then complete the square,\n",
      "resulting in  $(x+7)^2-49 + (y-2)^2-4=10$,\n",
      "or $(x+7)^2+(y-2)^2=63$.\n",
      "This is the equation of a circle with center $(-7, 2)$ and radius $\\sqrt{63},$\n",
      "so the area of this region is $\\pi r^2 = \\boxed{63\\pi}$.\n",
      "The answer is 63\\pi\n",
      "\n",
      "If $x^2+y^2=1$, what is the largest possible value of $|x|+|y|$?\n",
      "\n",
      "Let's think step by step\n",
      "If $(x,y)$ lies on the circle,\n",
      "so does $(x,-y),$ $(-x,-y),$ and $(-x,-y),$ (which all give the same value of $|x| + |y|$),\n",
      "so we can assume that $x \\ge 0$ and $y \\ge 0.$\n",
      "Then $|x| + |y| = x + y.$  Squaring, we get\n",
      "\\[(x + y)^2 = x^2 + 2xy + y^2 = 1 + 2xy.\\]\n",
      "Note that $(x - y)^2 \\ge 0.$\n",
      "Expanding, we get $x^2 - 2xy + y^2 \\ge 0,$ so $2xy \\le x^2 + y^2 = 1.$\n",
      "Hence,\\[1 + 2xy \\le 2,\\]which means $x + y \\le \\sqrt{2}.$\n",
      "Equality occurs when $x = y = \\frac{1}{\\sqrt{2}},$\n",
      "so the maximum value of $|x| + |y|$ is $\\boxed{\\sqrt{2}}.$\n",
      "The answer is \\sqrt{2}\n",
      "\n",
      "If $f(x)=\\frac{ax+b}{cx+d}, abcd\\not=0$ and $f(f(x))=x$ for all $x$ in the domain of $f$, what is the value of $a+d$?\n",
      "\n",
      "Let's think step by step\n",
      "The condition $f(f(x))$ means that $f$ is the inverse of itself,\n",
      "so its graph is symmetrical about the line $y = x$.\n",
      "With a rational function of this form, we will have two asymptotes:\n",
      "a vertical one at $x=-d/c$ if $cx+d$ does not divide $ax+b$,\n",
      "and a horizontal one at $y=a/c$,\n",
      "if we take the limit of $f(x)$ as $x$ goes to $\\pm\\infty$.\n",
      "In order for $f$ to be its own inverse, the intersection of the asymptotes must lie on the line $y=x$\n",
      "so that it and its asymptotes reflect onto themselves.\n",
      "This means that $-d/c=a/c$,\n",
      "and therefore $-d=a$ and $a+d=\\boxed{0}$.\n",
      "The answer is 0\n",
      "\n",
      "Expand $(2z^2 + 5z - 6)(3z^3 - 2z + 1)$.\n",
      "\n",
      "Let's think step by step\n",
      "$$\\begin{array}{crrrrrrr}\n",
      "& & & 3z^3 & & -2z & + 1 & \\\\\n",
      "\\times & & & & 2z^2 & +5z & -6 \\\\\n",
      "\\cline{1-7}\\rule{0pt}{0.17in}\n",
      "& & & -18z^3 & & +12z & -6 & \\\\\n",
      "& & +15z^4 & & -10z^2 & +5z & & \\\\\n",
      "+ & 6z^5 & & -4z^3 & +2z^2 & & & \\\\\n",
      "\\cline{1-7}\\rule{0pt}{0.17in}\n",
      "& 6z^5 & +15z^4 & -22z^3 & - 8z^2 &+17z & -6 &\n",
      "\\end{array}$$\n",
      "The answer is 6z^5+15z^4-22z^3-8z^2+17z-6\n",
      "\n",
      "Convert the point $(0,3)$ in rectangular coordinates to polar coordinates.  Enter your answer in the form $(r,\\theta),$ where $r > 0$ and $0 \\le \\theta < 2 \\pi.$\n",
      "\n",
      "--------------------------------------------------\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1/1 [00:00<00:00, 35544.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Results saved to result/math500/gpt-4o-mini/math500_few.jsonl\n",
      "==================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "500it [00:00, 1596006.09it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: gpt-4o-mini Task: math500, Shot: zero, Temperature: 1, N: 5\n",
      "--------------------------------------------------\n",
      "Role:\n",
      "system\n",
      "Content:\n",
      "Please reason step by step, and put your final answer within \\boxed{{}}.\n",
      "\n",
      "--------------------------------------------------\n",
      "Role:\n",
      "user\n",
      "Content:\n",
      "Convert the point $(0,3)$ in rectangular coordinates to polar coordinates.  Enter your answer in the form $(r,\\theta),$ where $r > 0$ and $0 \\le \\theta < 2 \\pi.$\n",
      "\n",
      "--------------------------------------------------\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1/1 [00:00<00:00, 39945.75it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Results saved to result/math500/gpt-4o-mini/math500_zero.jsonl\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "for model in models:\n",
    "    for task in tasks:\n",
    "        for shot in shots:\n",
    "            config.model = model\n",
    "            config.task = task\n",
    "            config.shot_type = shot\n",
    "            config.n = 5 \n",
    "            print(\"=\" * 50)\n",
    "            main(config)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "proj2",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
