{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/mnt/petrelfs/songmingyang/anaconda3/envs/smoe/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "Loading checkpoint shards: 100%|██████████| 17/17 [01:35<00:00,  5.62s/it]\n"
     ]
    }
   ],
   "source": [
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "model_name = \"/mnt/petrelfs/songmingyang/songmingyang/model/reasoning/policy_models/QwQ-32B-Preview\"\n",
    "\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_name,\n",
    "    torch_dtype=\"auto\",\n",
    "    device_map=\"auto\"\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Building Fewshots\n",
    "from mr_eval.utils.utils import *\n",
    "prm_test = \"/mnt/petrelfs/songmingyang/code/reasoning/MR_Hallucination/mr_annotate/build_data/selection_of_data/prm_correct_data/prm_test_p2.jsonl\"\n",
    "prm_test_data = process_jsonl(prm_test)\n",
    "\n",
    "def prepare_model_input(instruction, response, tokenizer):\n",
    "    template = \"Human: {q}\\nAssistant: {r}\"\n",
    "    # if gsm8k\n",
    "    # assistance_response = re.sub(r'\\n#### .*\\n', '\\n', response, flags=re.DOTALL)\n",
    "    assistance_response = response\n",
    "    inputs = template.format(q=instruction, r=assistance_response)\n",
    "    tokenized_inputs = tokenizer(inputs, return_tensors='pt')\n",
    "    return tokenized_inputs\n",
    "def answer_sequence_to_str(answer_sequence):\n",
    "    res = []\n",
    "    for idx,step in enumerate(answer_sequence):\n",
    "        res.append(f\"Step {idx+1}. {step['text']}\\n\\n\")\n",
    "    res_str = \"\".join(res)\n",
    "    return res_str\n",
    "\n",
    "def answer_sequence_to_default_str(answer_sequence,step_tag = 'и'):\n",
    "    res = []\n",
    "    for idx,step in enumerate(answer_sequence):\n",
    "        res.append(f\"Step {idx+1}: {step['text']} {step_tag}\\n\")\n",
    "    res_str = \"\".join(res)\n",
    "    return res_str\n",
    "    \n",
    "def answer_sequence_to_shepherd_str(answer_sequence,step_tag = 'ки'):\n",
    "    res = []\n",
    "    for idx,step in enumerate(answer_sequence):\n",
    "        res.append(f\"Step {idx+1}: {step['text']} {step_tag}\\n\")\n",
    "    res_str = \"\".join(res)\n",
    "    return res_str\n",
    "\n",
    "def answer_sequence_to_reasoneval_list(answer_sequence):\n",
    "    res = []\n",
    "    for idx,step in enumerate(answer_sequence):\n",
    "        res.append(f\"{idx+1}. {step['text']}\")\n",
    "    return res\n",
    "    \n",
    "\n",
    "def get_best_answer_by_item(item,return_type=\"shepherd\"):\n",
    "    steps = prm_item[\"label\"][\"steps\"]\n",
    "    best_answers = []\n",
    "    for step in steps:\n",
    "        if step[\"human_completion\"] is not None and step[\"chosen_completion\"] is None:\n",
    "            best_answers.append(step[\"human_completion\"])\n",
    "        elif step[\"chosen_completion\"] is not None:\n",
    "            best_answers.append(step[\"completions\"][step[\"chosen_completion\"]])\n",
    "        else:\n",
    "            print(f\"skipped one step\")\n",
    "    if return_type == \"shepherd\":\n",
    "        answer_str = answer_sequence_to_shepherd_str(best_answers)\n",
    "    elif return_type == \"str\":\n",
    "        answer_str = answer_sequence_to_str(best_answers)\n",
    "    elif return_type == \"reasoneval\":\n",
    "        answer_str = answer_sequence_to_reasoneval_list(best_answers)\n",
    "    elif return_type == \"default\":\n",
    "        answer_str = answer_sequence_to_default_str(best_answers)\n",
    "    else:\n",
    "        answer_str =  best_answers\n",
    "    return answer_str\n",
    "\n",
    "def get_latex_str(question,answer):\n",
    "    res = f\"Question:\\n\\n{question}\\n\\nAnswer:\\n\\n{answer}\"\n",
    "    return res\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1. To solve an equation involving absolute value, I need to consider two cases: one where the expression inside the absolute value is positive, and one where it is negative.\n",
      "\n",
      "Step 2. For the first case, I assume that both $x+5$ and $3x-6$ are positive, so I can drop the absolute value signs and get $x+5=3x-6$.\n",
      "\n",
      "Step 3. Solving for $x$ in this case, I subtract $x$ from both sides and add $6$ to both sides, and get $11=2x$.\n",
      "\n",
      "Step 4. Dividing both sides by $2$, I get $x=\\frac{11}{2}$.\n",
      "\n",
      "Step 5. For the second case, I assume that both $x+5$ and $3x-6$ are negative, so I can change the signs of both expressions and get $-x-5=-3x+6$.\n",
      "\n",
      "Step 6. Solving for $x$ in this case, I add $3x$ to both sides and subtract $6$ from both sides, and get $-11=2x$.\n",
      "\n",
      "Step 7. Dividing both sides by $2$, I get $x=-\\frac{11}{2}$.\n",
      "\n",
      "Step 8. Now I have two possible values for $x$, but the problem asks for the largest one, so I compare them and see that $\\frac{11}{2}$ is larger than $-\\frac{11}{2}$.\n",
      "\n",
      "Step 9. Therefore, the largest possible value of $x$ that satisfies the equation is $\\frac{11}{2}$.\n",
      "\n",
      "Step 10. # Answer\n",
      "\n",
      "\\frac{11}{2}\n",
      "\n",
      "\n"
     ]
    }
   ],
   "source": [
    "idx = 12\n",
    "prm_item = prm_test_data[idx]\n",
    "\n",
    "question = prm_item['question'][\"problem\"]\n",
    "ground_truth = prm_item['question'][\"ground_truth_answer\"]\n",
    "\n",
    "steps = prm_item[\"label\"][\"steps\"]\n",
    "best_answer = get_best_answer_by_item(prm_item,return_type=\"reasoneval\")\n",
    "best_steps = get_best_answer_by_item(prm_item,return_type=\"step\")\n",
    "best_latex = get_best_answer_by_item(prm_item,return_type=\"str\")\n",
    "latex_str = get_latex_str(question,best_latex)\n",
    "best_default = get_best_answer_by_item(prm_item,return_type=\"default\")\n",
    "print(best_latex)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'If $|x+5|-|3x-6|=0$, find the largest possible value of $x$. Express your answer as an improper fraction.'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "question"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "fewshot_q1 =\"Compute $\\arcsin \\left( -\\frac{1}{2} \\right).$  Express your answer in radians.\"\n",
    "fewshot_a1=\"\"\"\n",
    "Step 1. I know that the arcsine function is the inverse of the sine function, so I want to find an angle $\\theta$ such that $\\sin(\\theta) = -\\frac{1}{2}.$\n",
    "\n",
    "Step 2. I also know that the range of the arcsine function is $[-\\frac{\\pi}{2}, \\frac{\\pi}{2}]$, so I only need to consider angles in the fourth and first quadrants, where sine is negative and positive respectively.\n",
    "\n",
    "Step 3. I recall that the sine function is periodic with a period of $2\\pi$, so any angle that satisfies $\\sin(\\theta) = -\\frac{1}{2}$ must be of the form $\\theta = -\\frac{\\pi}{6} + 2k\\pi$ or $\\theta = \\frac{7\\pi}{6} + 2k\\pi$, where $k$ is an integer.\n",
    "\n",
    "Step 4. However, since I want $\\theta$ to be in the range of the arcsine function, I need to choose $k$ such that $-\\frac{\\pi}{2} \\leq \\theta \\leq \\frac{\\pi}{2}.$\n",
    "\n",
    "Step 5. This means that $k$ can only be 0 or -1, and the only possible values of $\\theta$ are $-\\frac{\\pi}{6}$ or $\\frac{7\\pi}{6}.$\n",
    "\n",
    "Step 6. To decide which one is the correct answer, I can use the fact that the arcsine function is an odd function, meaning that $\\arcsin(-x) = -\\arcsin(x)$ for any $x$ in the domain.\n",
    "\n",
    "Step 7. Therefore, since I have $\\arcsin \\left( -\\frac{1}{2} \\right)$, I need to take the negative of the angle that gives $\\sin(\\theta) = \\frac{1}{2}$, which is $\\frac{\\pi}{6}.$\n",
    "\n",
    "Step 8. So, the final answer is $\\theta = -\\frac{\\pi}{6}.$\n",
    "\n",
    "# Answer\n",
    "\n",
    "-\\frac{\\pi}{6}\n",
    "\"\"\"\n",
    "\n",
    "fewshot_q2=\"If $|x+5|-|3x-6|=0$, find the largest possible value of $x$. Express your answer as an improper fraction.\"\n",
    "\n",
    "fewshot_a2=\"\"\"\n",
    "Step 1. To solve an equation involving absolute value, I need to consider two cases: one where the expression inside the absolute value is positive, and one where it is negative.\n",
    "\n",
    "Step 2. For the first case, I assume that both $x+5$ and $3x-6$ are positive, so I can drop the absolute value signs and get $x+5=3x-6$.\n",
    "\n",
    "Step 3. Solving for $x$ in this case, I subtract $x$ from both sides and add $6$ to both sides, and get $11=2x$.\n",
    "\n",
    "Step 4. Dividing both sides by $2$, I get $x=\\frac{11}{2}$.\n",
    "\n",
    "Step 5. For the second case, I assume that both $x+5$ and $3x-6$ are negative, so I can change the signs of both expressions and get $-x-5=-3x+6$.\n",
    "\n",
    "Step 6. Solving for $x$ in this case, I add $3x$ to both sides and subtract $6$ from both sides, and get $-11=2x$.\n",
    "\n",
    "Step 7. Dividing both sides by $2$, I get $x=-\\frac{11}{2}$.\n",
    "\n",
    "Step 8. Now I have two possible values for $x$, but the problem asks for the largest one, so I compare them and see that $\\frac{11}{2}$ is larger than $-\\frac{11}{2}$.\n",
    "\n",
    "Step 9. Therefore, the largest possible value of $x$ that satisfies the equation is $\\frac{11}{2}$.\n",
    "\n",
    "# Answer\n",
    "\n",
    "\\frac{11}{2}\n",
    "\"\"\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1. So I have this problem here: three pencils and a jumbo eraser cost $1.24, and five pencils and a jumbo eraser cost $1.82. I need to find the cost of a pencil in cents.\n",
      "\n",
      "Step 2. First, I should probably convert the dollars to cents to make it easier since the question asks for the answer in cents. So, $1.24 is 124 cents, and $1.82 is 182 cents.\n",
      "\n",
      "Step 3. Now, I need to set up equations based on the information given. Let's say the cost of one pencil is P cents, and the cost of one jumbo eraser is E cents.\n",
      "\n",
      "Step 4. From the first statement, \"three pencils and a jumbo eraser cost 124 cents,\" I can write the equation: 3P + E = 124.\n",
      "\n",
      "Step 5. From the second statement, \"five pencils and a jumbo eraser cost 182 cents,\" I can write: 5P + E = 182.\n",
      "\n",
      "Step 6. So now I have a system of two equations:\n",
      "\n",
      "Equation 1: 3P + E = 124\n",
      "\n",
      "Equation 2: 5P + E = 182\n",
      "\n",
      "Step 7. I need to solve for P, the cost of a pencil. To do this, I can eliminate E by subtracting Equation 1 from Equation 2.\n",
      "\n",
      "Step 8. Subtracting Equation 1 from Equation 2: (5P + E) - (3P + E) = 182 - 124.\n",
      "\n",
      "Step 9. Simplifying that: 5P + E - 3P - E = 58.\n",
      "\n",
      "Step 10. Which further simplifies to: 2P = 58.\n",
      "\n",
      "Step 11. Therefore, P = 58 / 2 = 29 cents.\n",
      "\n",
      "Step 12. So, the cost of a pencil is 29 cents.\n",
      "\n",
      "Step 13. But to make sure, I should check if this makes sense with both original equations.\n",
      "\n",
      "Step 14. Plugging P = 29 into Equation 1: 3*(29) + E = 124 → 87 + E = 124 → E = 124 - 87 = 37 cents.\n",
      "\n",
      "Step 15. Now, check with Equation 2: 5*(29) + 37 = 145 + 37 = 182 cents, which matches the second statement.\n",
      "\n",
      "Step 16. Everything checks out, so the cost of a pencil is indeed 29 cents.\n",
      "\n",
      "# Final Answer\n",
      "\n",
      "\\[ \\boxed{29} \\]\n"
     ]
    }
   ],
   "source": [
    "prompt = \"Three pencils and a jumbo eraser cost $\\\\$1.24$. Five pencils and a jumbo eraser cost $\\\\$1.82$. No prices include tax. In cents, what is the cost of a pencil?\"\n",
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": \"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step. And return answers as the following format: Step 1. xxx \\n Step 2. xxx \\n ...... Step n. xxx \\n \"},\n",
    "    {\"role\": \"user\", \"content\": fewshot_q1},\n",
    "    {\"role\": \"assistant\", \"content\": fewshot_a1},\n",
    "    {\"role\": \"user\", \"content\": fewshot_q2},\n",
    "    {\"role\": \"assistant\", \"content\": fewshot_a2},\n",
    "    {\"role\": \"user\", \"content\": prompt},\n",
    "]\n",
    "text = tokenizer.apply_chat_template(\n",
    "    messages,\n",
    "    tokenize=False,\n",
    "    add_generation_prompt=True\n",
    ")\n",
    "model_inputs = tokenizer([text], return_tensors=\"pt\").to(model.device)\n",
    "\n",
    "generated_ids = model.generate(\n",
    "    **model_inputs,\n",
    "    max_new_tokens=2048\n",
    ")\n",
    "generated_ids = [\n",
    "    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n",
    "]\n",
    "\n",
    "response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "\n",
    "def extract_steps(text):\n",
    "    \"\"\"\n",
    "    从文本中提取每个 Step 的内容，并按顺序返回一个列表。\n",
    "    \"\"\"\n",
    "    # 正则表达式：匹配 \"Step X.\" 开头，捕获其后的内容\n",
    "    pattern = r\"(Step \\d+\\..*?)(?=Step \\d+\\.|\\Z)\"  # 匹配 Step 开头到下一个 Step 或文本结束\n",
    "    steps = re.findall(pattern, text, re.DOTALL)  # 使用 re.DOTALL 允许匹配换行符\n",
    "    return steps\n",
    "\n",
    "# 输入文本\n",
    "text = \"\"\"\n",
    "Step 1. Let's define variables for the prices of the items. Let's say the cost of one pencil is p cents and the cost of one jumbo eraser is e cents.\n",
    "\n",
    "Step 2. Now, we can translate the given information into equations. The first statement says that three pencils and one jumbo eraser cost $1.24. Since 1 dollar is 100 cents, $1.24 is 124 cents. So, the equation is:\n",
    "\n",
    "3p + e = 124\n",
    "\n",
    "Step 3. The second statement says that five pencils and one jumbo eraser cost $1.82, which is 182 cents. So, the equation is:\n",
    "\n",
    "5p + e = 182\n",
    "\n",
    "Step 4. Now, we have a system of two equations with two variables:\n",
    "\n",
    "3p + e = 124\n",
    "\n",
    "5p + e = 182\n",
    "\n",
    "Step 5. To solve for p and e, we can use the elimination method. Let's subtract the first equation from the second equation to eliminate e:\n",
    "\n",
    "(5p + e) - (3p + e) = 182 - 124\n",
    "\n",
    "Step 6. Simplifying this, we get:\n",
    "\n",
    "5p + e - 3p - e = 58\n",
    "\n",
    "Which reduces to:\n",
    "\n",
    "2p = 58\n",
    "\n",
    "Step 7. Dividing both sides by 2:\n",
    "\n",
    "p = 29\n",
    "\n",
    "Step 8. Now that we have the value of p, which is 29 cents, we can substitute it back into one of the original equations to find e. Let's use the first equation:\n",
    "\n",
    "3(29) + e = 124\n",
    "\n",
    "Step 9. Calculating 3 times 29:\n",
    "\n",
    "87 + e = 124\n",
    "\n",
    "Step 10. Subtracting 87 from both sides:\n",
    "\n",
    "e = 124 - 87\n",
    "\n",
    "e = 37\n",
    "\n",
    "Step 11. So, the cost of one pencil is 29 cents and the cost of one jumbo eraser is 37 cents.\n",
    "\n",
    "Step 12. But the question is asking specifically for the cost of a pencil in cents, which is 29.\n",
    "\n",
    "# Answer\n",
    "\n",
    "\\[ \\boxed{29} \\]\n",
    "\"\"\"\n",
    "\n",
    "# 提取步骤\n",
    "steps = extract_steps(text)\n",
    "\n",
    "# 打印每一步内容\n",
    "len(steps)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "## Test batch inference\n",
    "from copy import deepcopy\n",
    "prompt = \"Three pencils and a jumbo eraser cost $\\\\$1.24$. Five pencils and a jumbo eraser cost $\\\\$1.82$. No prices include tax. In cents, what is the cost of a pencil?\"\n",
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": \"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step. And return answers as the following format: Step 1. xxx \\n Step 2. xxx \\n ...... Step n. xxx \\n \"},\n",
    "    {\"role\": \"user\", \"content\": fewshot_q1},\n",
    "    {\"role\": \"assistant\", \"content\": fewshot_a1},\n",
    "    {\"role\": \"user\", \"content\": fewshot_q2},\n",
    "    {\"role\": \"assistant\", \"content\": fewshot_a2},\n",
    "    {\"role\": \"user\", \"content\": prompt},\n",
    "]\n",
    "messages2 = [\n",
    "    {\"role\": \"system\", \"content\": \"You are a helpful and harmless assistant. You are Qwen developed by Alibaba. \"},\n",
    "    {\"role\": \"user\", \"content\": fewshot_q1},\n",
    "    {\"role\": \"assistant\", \"content\": fewshot_a1},\n",
    "    {\"role\": \"user\", \"content\": fewshot_q2},\n",
    "    {\"role\": \"assistant\", \"content\": fewshot_a2},\n",
    "    {\"role\": \"user\", \"content\": \"If $|x+5|-|3x-6|=0$, find the largest possible value of $x$. Express your answer as an improper fraction.\"},\n",
    "]\n",
    "\n",
    "\n",
    "text = tokenizer.apply_chat_template(\n",
    "    messages,\n",
    "    tokenize=False,\n",
    "    add_generation_prompt=True\n",
    ")\n",
    "text2 = tokenizer.apply_chat_template(\n",
    "    messages2,\n",
    "    tokenize=False,\n",
    "    add_generation_prompt=True\n",
    ")\n",
    "\n",
    "model_inputs = tokenizer([text,text2], return_tensors=\"pt\",padding=True).to(model.device)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "torch.Size([1, 1027])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "text = tokenizer.apply_chat_template(\n",
    "    messages,\n",
    "    tokenize=True,\n",
    "    add_generation_prompt=True,\n",
    "    return_tensors=\"pt\"\n",
    ")\n",
    "text.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1. So I have this problem here: three pencils and a jumbo eraser cost $1.24, and five pencils and a jumbo eraser cost $1.82. I need to find the cost of a pencil in cents.\n",
      "\n",
      "Step 2. First, I should probably set up some equations based on the information given. Let's denote the cost of one pencil as p cents and the cost of one jumbo eraser as e cents.\n",
      "\n",
      "Step 3. Since the prices are given in dollars, I need to convert them to cents to make calculations easier. So, $1.24 is 124 cents, and $1.82 is 182 cents.\n",
      "\n",
      "Step 4. Now, translating the statements into equations:\n",
      "\n",
      "- Three pencils and one eraser cost 124 cents: 3p + e = 124\n",
      "\n",
      "- Five pencils and one eraser cost 182 cents: 5p + e = 182\n",
      "\n",
      "Step 5. I have a system of two equations with two variables:\n",
      "\n",
      "1) 3p + e = 124\n",
      "\n",
      "2) 5p + e = 182\n",
      "\n",
      "Step 6. To solve for p, I can subtract equation 1 from equation 2 to eliminate e:\n",
      "\n",
      "(5p + e) - (3p + e) = 182 - 124\n",
      "\n",
      "Step 7. Simplifying that:\n",
      "\n",
      "5p + e - 3p - e = 58\n",
      "\n",
      "Which reduces to:\n",
      "\n",
      "2p = 58\n",
      "\n",
      "Step 8. So, p = 58 / 2 = 29 cents.\n",
      "\n",
      "Step 9. Wait, but I should check if this makes sense. If p is 29 cents, then let's find e from one of the equations.\n",
      "\n",
      "Using equation 1: 3p + e = 124\n",
      "\n",
      "3*(29) + e = 124\n",
      "\n",
      "87 + e = 124\n",
      "\n",
      "So, e = 124 - 87 = 37 cents.\n",
      "\n",
      "Step 10. Let me verify this with equation 2: 5p + e = 182\n",
      "\n",
      "5*(29) + 37 = 145 + 37 = 182 cents. Perfect, it matches.\n",
      "\n",
      "Step 11. So, the cost of a pencil is 29 cents.\n",
      "\n",
      "But wait, the problem says, \"in cents,\" so I should make sure that's the final answer.\n",
      "\n",
      "Step 12. Alternatively, perhaps there's another way to approach this, like using消元法 or substitution.\n",
      "\n",
      "Step 13. Let me try substitution method just to confirm.\n",
      "\n",
      "From equation 1: e = 124 - 3p\n",
      "\n",
      "Plugging into equation 2: 5p + (124 - 3p) = 182\n",
      "\n",
      "Simplify: 5p + 124 - 3p = 182\n",
      "\n",
      "Which is: 2p + 124 = 182\n",
      "\n",
      "Then: 2p = 182 - 124 = 58\n",
      "\n",
      "So again, p = 29 cents.\n",
      "\n",
      "Step 14. Seems consistent. Maybe I can also consider if there are any other variables or constraints I'm missing, but the problem seems straightforward.\n",
      "\n",
      "Step 15. Another way to look at it is to consider the difference in cost between the two scenarios.\n",
      "\n",
      "Step 16. The difference in pencils between the two scenarios is 5 - 3 = 2 pencils.\n",
      "\n",
      "Step 17. The difference in cost is $1.82 - $1.24 = $0.58.\n",
      "\n",
      "Step 18. So, 2 pencils cost $0.58, meaning one pencil costs $0.29, which is 29 cents.\n",
      "\n",
      "Step 19. This matches my earlier result, giving me more confidence in the answer.\n",
      "\n",
      "Step 20. Therefore, the cost of a pencil is 29 cents.\n",
      "\n",
      "# Final Answer\n",
      "\n",
      "\\[ \\boxed{29} \\]\n"
     ]
    }
   ],
   "source": [
    "generated_ids = model.generate(\n",
    "    **model_inputs,\n",
    "    max_new_tokens=2048\n",
    ")\n",
    "generated_ids = [\n",
    "    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)\n",
    "]\n",
    "\n",
    "response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(generated_ids)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": true,
    "jupyter": {
     "outputs_hidden": true
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Step 1. I have two equations based on the given information:\n",
      "\n",
      "3 pencils + 1 eraser = $1.24\n",
      "\n",
      "5 pencils + 1 eraser = $1.82\n",
      "\n",
      "Step 2. I need to find the cost of one pencil in cents. So, I should solve for the price of a pencil.\n",
      "\n",
      "Step 3. Let's denote the price of one pencil as p (in dollars) and the price of one eraser as e (in dollars).\n",
      "\n",
      "So, the equations become:\n",
      "\n",
      "3p + e = 1.24 ...(1)\n",
      "\n",
      "5p + e = 1.82 ...(2)\n",
      "\n",
      "Step 4. I can subtract equation (1) from equation (2) to eliminate e:\n",
      "\n",
      "(5p + e) - (3p + e) = 1.82 - 1.24\n",
      "\n",
      "Which simplifies to:\n",
      "\n",
      "2p = 0.58\n",
      "\n",
      "Step 5. Solving for p:\n",
      "\n",
      "p = 0.58 / 2\n",
      "\n",
      "p = 0.29 dollars\n",
      "\n",
      "Step 6. Since the question asks for the cost in cents, I need to convert dollars to cents. There are 100 cents in a dollar.\n",
      "\n",
      "So, p = 0.29 * 100 = 29 cents\n",
      "\n",
      "Step 7. To verify, I can plug the value of p back into one of the original equations to find e.\n",
      "\n",
      "Using equation (1):\n",
      "\n",
      "3*(0.29) + e = 1.24\n",
      "\n",
      "0.87 + e = 1.24\n",
      "\n",
      "e = 1.24 - 0.87\n",
      "\n",
      "e = 0.37 dollars or 37 cents\n",
      "\n",
      "Step 8. Check with equation (2):\n",
      "\n",
      "5*(0.29) + 0.37 = 1.45 + 0.37 = 1.82, which matches the given total.\n",
      "\n",
      "Step 9. Therefore, the cost of a pencil is 29 cents.\n",
      "\n",
      "# Final Answer\n",
      "\n",
      "\\[ \\boxed{29} \\]\n"
     ]
    }
   ],
   "source": [
    "response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[1]\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "smoe",
   "language": "python",
   "name": "smoe"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
