{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0e0403b1",
   "metadata": {},
   "source": [
    "### Define the System Prompt and Prompt Template"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "faae77e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# For Instruct Model, we follow the default format (System, User, Assistant)\n",
    "INSTRUCT_SYSTEM_PROMPT = (\n",
    "    \"Please act as an impartial judge and evaluate the quality of the responses provided by two AI Chatbots to the Client's question displayed below.\\n\\n\"\n",
    "    \"First, classify the task into one of two categories: <type>Reasoning</type> or <type>Chat</type>.\\n\"\n",
    "    \"- Use <type>Reasoning</type> for tasks that involve math, coding, or require domain knowledge, multi-step inference, logical deduction, or combining information to reach a conclusion.\\n\"\n",
    "    \"- Use <type>Chat</type> for tasks that involve open-ended or factual conversation, stylistic rewrites, safety questions, or general helpfulness requests without deep reasoning.\\n\\n\"\n",
    "    \n",
    "    \"If the task is Reasoning:\\n\"\n",
    "    \"1. Solve the Client's question yourself and present your final answer within <solution>...</solution> tags.\\n\"\n",
    "    \"2. Evaluate the two Chatbot responses based on correctness, completeness, and reasoning quality, referencing your own solution.\\n\"\n",
    "    \"3. Include your evaluation inside <eval>...</eval> tags, quoting or summarizing the Chatbots using the following tags:\\n\"\n",
    "    \"   - <quote_A>...</quote_A> for direct quotes from Chatbot A\\n\"\n",
    "    \"   - <summary_A>...</summary_A> for paraphrases of Chatbot A\\n\"\n",
    "    \"   - <quote_B>...</quote_B> for direct quotes from Chatbot B\\n\"\n",
    "    \"   - <summary_B>...</summary_B> for paraphrases of Chatbot B\\n\"\n",
    "    \"4. End with your final judgment in the format: <answer>[[A]]</answer> or <answer>[[B]]</answer>\\n\\n\"\n",
    "\n",
    "    \"If the task is Chat:\\n\"\n",
    "    \"1. Generate evaluation criteria (rubric) tailored to the Client's question and context, enclosed in <rubric>...</rubric> tags.\\n\"\n",
    "    \"2. Assign weights to each rubric item based on their relative importance.\\n\"\n",
    "    \"3. Inside <rubric>, include a <justify>...</justify> section explaining why you chose those rubric criteria and weights.\\n\"\n",
    "    \"4. Compare both Chatbot responses according to the rubric.\\n\"\n",
    "    \"5. Provide your evaluation inside <eval>...</eval> tags, using <quote_A>, <summary_A>, <quote_B>, and <summary_B> as described above.\\n\"\n",
    "    \"6. End with your final judgment in the format: <answer>[[A]]</answer> or <answer>[[B]]</answer>\\n\\n\"\n",
    "\n",
    "    \"Important Notes:\\n\"\n",
    "    \"- Be objective and base your evaluation only on the content of the responses.\\n\"\n",
    "    \"- Do not let response order, length, or Chatbot names affect your judgment.\\n\"\n",
    "    \"- Follow the response format strictly depending on the task type.\\n\\n\"\n",
    "\n",
    "    \"Your output must follow one of the two formats below:\\n\\n\"\n",
    "    \"For Reasoning:\\n\"\n",
    "    \"<type>Reasoning</type>\\n\\n\"\n",
    "    \"<solution> your own solution for the problem </solution>\\n\\n\"\n",
    "    \"<eval>\\n\"\n",
    "    \"  include direct comparisons supported by <quote_A>...</quote_A> or <summary_A>...</summary_A>, and <quote_B>...</quote_B>, or <summary_B>...</summary_B>\\n\"\n",
    "    \"</eval>\\n\\n\"\n",
    "    \"<answer>[[A/B]]</answer>\\n\\n\"\n",
    "\n",
    "    \"For Chat:\\n\"\n",
    "    \"<type>Chat</type>\\n\\n\"\n",
    "    \"<rubric>\\n\"\n",
    "    \"  detailed rubric items\\n\"\n",
    "    \"  <justify> justification for the rubric </justify>\\n\"\n",
    "    \"</rubric>\\n\\n\"\n",
    "    \"<eval>\\n\"\n",
    "    \"  include direct comparisons supported by <quote_A>...</quote_A> or <summary_A>...</summary_A>, and <quote_B>...</quote_B>, or <summary_B>...</summary_B> tags\\n\"\n",
    "    \"</eval>\\n\\n\"\n",
    "    \"<answer>[[A/B]]</answer>\"\n",
    ")\n",
    "\n",
    "\n",
    "INSTRUCT_SINGLE_USER_PROMPT_TEMPLATE = (\n",
    "    \"[Client Question]\\n{question}\\n\\n[The Start of Chatbot A's Response]\\n{answer_a}\\n[The End of Chatbot A's Response]\\n\\n\"\n",
    "    \"[The Start of Chatbot B's Response]\\n{answer_b}\\n[The End of Chatbot B's Response]\"\n",
    ")\n",
    "\n",
    "INSTRUCT_MULTI_USER_PROMPT_TEMPLATE = (\n",
    "    \"[The Start of the Conversation between Chatbot A and the Client]\\n{conversation_1}\\n[The End of the Conversation between Chatbot A and the Client]\\n\\n\"\n",
    "    \"[The Start of the Conversation between Chatbot B and the Client]\\n{conversation_2}\\n[The End of the Conversation between Chatbot B and the Client]\"\n",
    ")\n",
    "\n",
    "# For Reasoning Model, we follow the default format (User, Assistant)\n",
    "REASONING_SINGLE_PROMPT_TEMPLATE = (\n",
    "    \"Please act as an impartial judge and evaluate the quality of the responses provided by two AI Chatbots to the Client question displayed below. \\n\\n\"\n",
    "    \"[Client Question]\\n{question}\\n\\n[The Start of Chatbot A's Response]\\n{answer_a}\\n[The End of Chatbot A's Response]\\n\\n\"\n",
    "    \"[The Start of Chatbot B's Response]\\n{answer_b}\\n[The End of Chatbot B's Response]\" + \"\\n\\n\"\n",
    "    \"Output your final verdict at last by strictly following this format: \"\n",
    "    \"'<answer>[[A]]</answer>' if Chatbot A is better, or '<answer>[[B]]</answer>' if Chatbot B is better.\"\n",
    ")\n",
    "\n",
    "REASONING_MULTI_PROMPT_TEMPLATE = (\n",
    "    \"Please act as an impartial judge and evaluate the quality of the responses provided by two AI Chatbots to the Client question displayed below. \\n\\n\"\n",
    "    \"[The Start of the Conversation between Chatbot A and the Client]\\n{conversation_1}\\n[The End of the Conversation between Chatbot A and the Client]\\n\\n\"\n",
    "    \"[The Start of the Conversation between Chatbot B and the Client]\\n{conversation_2}\\n[The End of the Conversation between Chatbot B and the Client]\" + \"\\n\\n\"\n",
    "    \"Output your final verdict at last by strictly following this format: \"\n",
    "    \"'<answer>[[A]]</answer>' if Chatbot A is better, or '<answer>[[B]]</answer>' if Chatbot B is better.\"\n",
    ")\n",
    "\n",
    "import warnings\n",
    "from tqdm import TqdmWarning\n",
    "\n",
    "warnings.filterwarnings(\"ignore\", category=TqdmWarning)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bcabced3",
   "metadata": {},
   "source": [
    "### Example of Using Instruct Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b9c803a1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading checkpoint shards: 100%|██████████| 6/6 [00:07<00:00,  1.27s/it]\n"
     ]
    }
   ],
   "source": [
    "# Load the Model \n",
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "model_name = \"gaotang/RM-R1-Qwen2.5-Instruct-14B\"\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_name,\n",
    "    torch_dtype=\"auto\",\n",
    "    device_map=\"auto\" # or specify the specific device map if needed\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d23131e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<type>Chat</type>\n",
      "\n",
      "<rubric>\n",
      "1. Comprehensiveness of information (40%): How thoroughly does the response cover all aspects of car detailing, including both exterior and interior processes?\n",
      "\n",
      "2. Practicality and usefulness (30%): How actionable and practical is the information provided? Does it include specific steps, tools, and techniques that someone could follow?\n",
      "\n",
      "3. Organization and clarity (20%): How well-structured and easy to follow is the information presented?\n",
      "\n",
      "4. Additional value (10%): Does the response include helpful tips, safety considerations, or other valuable information beyond the basic steps?\n",
      "\n",
      "<justify>\n",
      "These criteria focus on evaluating how well each response addresses the question of how to detail a car. The highest weight is given to comprehensiveness because detailing a car involves multiple steps and processes that need to be covered thoroughly. Practicality is weighted second highest because the client needs information they can actually use. Organization is important for making the information accessible, and additional value is included to recognize responses that go beyond the basic requirements.\n",
      "</justify>\n",
      "</rubric>\n",
      "\n",
      "<eval>\n",
      "Comparing the two responses:\n",
      "\n",
      "Comprehensiveness of information:\n",
      "<quote_A>Detailing a car involves a thorough cleaning inside and out, as well as polishing and waxing to protect the vehicle's surfaces. Here's a step-by-step guide to detailing a car:</quote_A>\n",
      "<summary_A>Chatbot A provides a comprehensive guide that covers both exterior and interior detailing in detail. It includes specific steps for washing, drying, clay bar treatment, polishing, waxing, tire and trim dressing, removing trash, vacuuming, shampooing carpets, cleaning hard surfaces, and more.</summary_A>\n",
      "\n",
      "<quote_B>Detailing a car involves washing the exterior and interior of the car, as well as polishing and waxing the exterior. Interior detailing typically involves vacuuming, cleaning the upholstery and air vents, polishing the dashboard and console, and dusting.</quote_B>\n",
      "<summary_B>Chatbot B provides a very basic overview of car detailing, mentioning only the most general aspects of exterior and interior cleaning. It lacks specific steps, techniques, or tools needed for the process.</summary_B>\n",
      "\n",
      "Practicality and usefulness:\n",
      "<quote_A>1. **Wash the Car:**\n",
      "   - Rinse the car with water to remove loose dirt.\n",
      "   - Use a car wash soap and microfiber wash mitt to clean the car from top to bottom.\n",
      "   - Clean the wheels and tires with a brush and a wheel cleaner.\n",
      "   - Rinse the car thoroughly to remove all soap.</quote_A>\n",
      "<summary_A>Chatbot A provides detailed, actionable steps with specific tools and techniques for each stage of the detailing process. It includes specific products like clay bars, polish, wax, and trim restorers, as well as specific cleaning methods for different parts of the car.</summary_A>\n",
      "\n",
      "<quote_B>Polishing and waxing the exterior will depend on the condition of the paint, but typically involves applying a polish and wax to make it shine.</quote_B>\n",
      "<summary_B>Chatbot B offers very general information with no specific steps, tools, or techniques. It doesn't provide actionable information that someone could follow to actually detail their car.</summary_B>\n",
      "\n",
      "Organization and clarity:\n",
      "<quote_A>**Exterior Detailing:**\n",
      "1. **Wash the Car:**\n",
      "   - Rinse the car with water to remove loose dirt.\n",
      "   - Use a car wash soap and microfiber wash mitt to clean the car from top to bottom.\n",
      "   - Clean the wheels and tires with a brush and a wheel cleaner.\n",
      "   - Rinse the car thoroughly to remove all soap.</quote_A>\n",
      "<summary_A>Chatbot A uses a clear, organized structure with bold headings for exterior and interior detailing, numbered steps, and bullet points for specific actions. The information is presented in a logical sequence that someone could easily follow.</summary_A>\n",
      "\n",
      "<quote_B>Detailing a car involves washing the exterior and interior of the car, as well as polishing and waxing the exterior. Interior detailing typically involves vacuuming, cleaning the upholstery and air vents, polishing the dashboard and console, and dusting.</quote_B>\n",
      "<summary_B>Chatbot B presents information in a very basic, unstructured format with no clear organization or step-by-step approach.</summary_B>\n",
      "\n",
      "Additional value:\n",
      "<quote_A>- Work in the shade or a cool, well-ventilated garage to prevent products from drying too quickly and leaving residue.\n",
      "- Use separate buckets for washing and rinsing to avoid contaminating the clean water with dirt.\n",
      "- Always use gentle, non-abrasive materials and cleaners specifically designed for automotive use to avoid damaging surfaces.\n",
      "- Move in a systematic way to ensure you don't miss any spots.</quote_A>\n",
      "<summary_A>Chatbot A includes valuable tips about working conditions, proper techniques, and safety considerations that go beyond the basic steps.</summary_A>\n",
      "\n",
      "Chatbot B provides no additional value beyond the basic information.\n",
      "</eval>\n",
      "\n",
      "<answer>[[A]]</answer>\n"
     ]
    }
   ],
   "source": [
    "# Single Turn Example - from Reward Bench \n",
    "\n",
    "prompt = \"How do I detail a car?\"\n",
    "answer_a = \"Detailing a car involves a thorough cleaning inside and out, as well as polishing and waxing to protect the vehicle's surfaces. Here's a step-by-step guide to detailing a car:\\n\\n**Exterior Detailing:**\\n\\n1. **Wash the Car:**\\n   - Rinse the car with water to remove loose dirt.\\n   - Use a car wash soap and microfiber wash mitt to clean the car from top to bottom.\\n   - Clean the wheels and tires with a brush and a wheel cleaner.\\n   - Rinse the car thoroughly to remove all soap.\\n\\n2. **Dry the Car:**\\n   - Use a microfiber towel or a chamois to dry the car to prevent water spots.\\n\\n3. **Clay Bar Treatment:**\\n   - Use a clay bar with a lubricant to remove embedded surface contaminants from the paint.\\n\\n4. **Polishing:**\\n   - Apply car polish with a dual-action polisher or by hand to correct paint imperfections and create a smooth surface.\\n\\n5. **Waxing:**\\n   - Apply a coat of wax or paint sealant to protect the paint and give it a glossy finish.\\n\\n6. **Windows and Mirrors:**\\n   - Clean the windows and mirrors with a glass cleaner and a microfiber towel.\\n\\n7. **Tire and Trim Dressing:**\\n   - Apply a tire dressing to the tires for a shiny finish.\\n   - Use a trim restorer or protectant on plastic and rubber parts to prevent fading.\\n\\n**Interior Detailing:**\\n\\n1. **Remove Trash:**\\n   - Clear out any trash and remove personal items from the car.\\n\\n2. **Vacuum:**\\n   - Vacuum the seats, carpets, floor mats, and trunk.\\n   - Use a brush attachment for the dashboard and door panels.\\n\\n3. **Shampoo Carpets and Upholstery:**\\n   - Use a carpet cleaner and a brush to clean the carpets and upholstery.\\n   - For leather interiors, use a leather cleaner and conditioner.\\n\\n4. **Clean Hard Surfaces:**\\n   - Wipe down all hard surfaces (dashboard, center console, door panels, etc.) with a mild all-purpose cleaner and a microfiber cloth.\\n\\n5. **Windows and Mirrors:**\\n   - Clean the interior side of windows and mirrors.\\n\\n6. **Air Vents and Crevices:**\\n   - Use a detailing brush or compressed air to clean out air vents and hard-to-reach crevices.\\n\\n7. **Final Touches:**\\n   - Apply a protectant to the dashboard and other plastic components.\\n   - Replace air fresheners if needed.\\n\\n**Additional Tips:**\\n\\n- Work in the shade or a cool, well-ventilated garage to prevent products from drying too quickly and leaving residue.\\n- Use separate buckets for washing and rinsing to avoid contaminating the clean water with dirt.\\n- Always use gentle, non-abrasive materials and cleaners specifically designed for automotive use to avoid damaging surfaces.\\n- Move in a systematic way to ensure you don't miss any spots.\\n\\nBy following these steps, you'll give your car a thorough clean that not only makes it look great but also helps in maintaining its value. Remember, regular detailing can prevent wear and tear and keep your car looking new for years to come.\" # Accepted\n",
    "answer_b = \"Detailing a car involves washing the exterior and interior of the car, as well as polishing and waxing the exterior. Interior detailing typically involves vacuuming, cleaning the upholstery and air vents, polishing the dashboard and console, and dusting. Polishing and waxing the exterior will depend on the condition of the paint, but typically involves applying a polish and wax to make it shine.\"  # Rejected\n",
    "\n",
    "user_prompt_single = INSTRUCT_SINGLE_USER_PROMPT_TEMPLATE.format(\n",
    "    question=prompt,\n",
    "    answer_a=answer_a,\n",
    "    answer_b=answer_b\n",
    ") \n",
    "\n",
    "conversation = [\n",
    "    {\"role\":\"system\", \"content\": INSTRUCT_SYSTEM_PROMPT},\n",
    "    {\"role\":\"user\", \"content\": user_prompt_single}\n",
    "]\n",
    "\n",
    "input_ids = tokenizer.apply_chat_template(\n",
    "    conversation, \n",
    "    tokenize=True, \n",
    "    add_generation_prompt=True,\n",
    "    return_tensors=\"pt\").to(model.device)\n",
    "\n",
    "generation = model.generate(\n",
    "    input_ids=input_ids,\n",
    "    max_new_tokens=8192, # For optimal benchmarking, set this to unlimited (e.g., 50000)\n",
    "    do_sample=False,\n",
    ")\n",
    "\n",
    "completion = tokenizer.decode(\n",
    "    generation[0][len(input_ids[0]):], \n",
    "    skip_special_tokens=True, \n",
    "    clean_up_tokenization_spaces=True\n",
    ")\n",
    "\n",
    "print(completion)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "79ea328b",
   "metadata": {},
   "source": [
    "### Example of Using Reasoning Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "b323f216",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading checkpoint shards: 100%|██████████| 7/7 [00:27<00:00,  3.96s/it]\n"
     ]
    }
   ],
   "source": [
    "# Load the Model \n",
    "import torch\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
    "\n",
    "model_name = \"gaotang/RM-R1-DeepSeek-Distilled-Qwen-14B\"\n",
    "model = AutoModelForCausalLM.from_pretrained(\n",
    "    model_name,\n",
    "    torch_dtype=\"auto\",\n",
    "    device_map=\"auto\" # or specify the specific device map if needed\n",
    ")\n",
    "tokenizer = AutoTokenizer.from_pretrained(model_name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a0ec2d99",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Okay, so I need to evaluate which of the two chatbots, A or B, provided a better response to the client's question about how to detail a car. Let me start by reading both responses carefully.\n",
      "\n",
      "First, Chatbot A's response is quite detailed. It breaks down the process into exterior and interior detailing, each with several steps. The exterior part includes washing, drying, clay bar treatment, polishing, waxing, and cleaning windows. The interior steps cover removing trash, vacuuming, shampooing, cleaning hard surfaces, windows again, air vents, and final touches. There are also additional tips provided, which is helpful. This seems comprehensive and thorough, covering almost every aspect of car detailing.\n",
      "\n",
      "On the other hand, Chatbot B's response is much shorter. It mentions washing the exterior and interior, polishing and waxing the exterior, and some interior steps like vacuuming, cleaning upholstery, polishing the dashboard, and dusting. It also notes that exterior polishing and waxing depend on the paint condition. While it's correct, it's quite brief and doesn't go into the same level of detail as Chatbot A.\n",
      "\n",
      "I think about what a client would expect when asking how to detail a car. They probably want a clear, step-by-step guide that's easy to follow. Chatbot A provides exactly that with each step explained, including tips and why each step is important. It also mentions using the right products and methods, which adds value.\n",
      "\n",
      "Chatbot B, while accurate, is too concise. It doesn't offer the depth or additional tips that would be helpful for someone looking to do a thorough job. The client might feel they're missing important steps or not fully understanding the process after reading B's response.\n",
      "\n",
      "So, considering the thoroughness, detail, and helpfulness, Chatbot A's response is significantly better than Chatbot B's. It educates the client fully on the process, making it easier for them to achieve a professional-level detail on their car.\n",
      "</think>\n",
      "\n",
      "The evaluation of the two chatbots' responses to the question \"How do I detail a car?\" is based on the thoroughness and detail provided. Chatbot A offers a comprehensive, step-by-step guide covering both exterior and interior detailing, along with additional tips, making it highly informative. In contrast, Chatbot B's response is brief and lacks the depth necessary for a complete understanding of the detailing process.\n",
      "\n",
      "<answer>[[A]]</answer>\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# Single Turn Example - from Reward Bench \n",
    "prompt = \"How do I detail a car?\"\n",
    "answer_a = \"Detailing a car involves a thorough cleaning inside and out, as well as polishing and waxing to protect the vehicle's surfaces. Here's a step-by-step guide to detailing a car:\\n\\n**Exterior Detailing:**\\n\\n1. **Wash the Car:**\\n   - Rinse the car with water to remove loose dirt.\\n   - Use a car wash soap and microfiber wash mitt to clean the car from top to bottom.\\n   - Clean the wheels and tires with a brush and a wheel cleaner.\\n   - Rinse the car thoroughly to remove all soap.\\n\\n2. **Dry the Car:**\\n   - Use a microfiber towel or a chamois to dry the car to prevent water spots.\\n\\n3. **Clay Bar Treatment:**\\n   - Use a clay bar with a lubricant to remove embedded surface contaminants from the paint.\\n\\n4. **Polishing:**\\n   - Apply car polish with a dual-action polisher or by hand to correct paint imperfections and create a smooth surface.\\n\\n5. **Waxing:**\\n   - Apply a coat of wax or paint sealant to protect the paint and give it a glossy finish.\\n\\n6. **Windows and Mirrors:**\\n   - Clean the windows and mirrors with a glass cleaner and a microfiber towel.\\n\\n7. **Tire and Trim Dressing:**\\n   - Apply a tire dressing to the tires for a shiny finish.\\n   - Use a trim restorer or protectant on plastic and rubber parts to prevent fading.\\n\\n**Interior Detailing:**\\n\\n1. **Remove Trash:**\\n   - Clear out any trash and remove personal items from the car.\\n\\n2. **Vacuum:**\\n   - Vacuum the seats, carpets, floor mats, and trunk.\\n   - Use a brush attachment for the dashboard and door panels.\\n\\n3. **Shampoo Carpets and Upholstery:**\\n   - Use a carpet cleaner and a brush to clean the carpets and upholstery.\\n   - For leather interiors, use a leather cleaner and conditioner.\\n\\n4. **Clean Hard Surfaces:**\\n   - Wipe down all hard surfaces (dashboard, center console, door panels, etc.) with a mild all-purpose cleaner and a microfiber cloth.\\n\\n5. **Windows and Mirrors:**\\n   - Clean the interior side of windows and mirrors.\\n\\n6. **Air Vents and Crevices:**\\n   - Use a detailing brush or compressed air to clean out air vents and hard-to-reach crevices.\\n\\n7. **Final Touches:**\\n   - Apply a protectant to the dashboard and other plastic components.\\n   - Replace air fresheners if needed.\\n\\n**Additional Tips:**\\n\\n- Work in the shade or a cool, well-ventilated garage to prevent products from drying too quickly and leaving residue.\\n- Use separate buckets for washing and rinsing to avoid contaminating the clean water with dirt.\\n- Always use gentle, non-abrasive materials and cleaners specifically designed for automotive use to avoid damaging surfaces.\\n- Move in a systematic way to ensure you don't miss any spots.\\n\\nBy following these steps, you'll give your car a thorough clean that not only makes it look great but also helps in maintaining its value. Remember, regular detailing can prevent wear and tear and keep your car looking new for years to come.\" # Accepted\n",
    "answer_b = \"Detailing a car involves washing the exterior and interior of the car, as well as polishing and waxing the exterior. Interior detailing typically involves vacuuming, cleaning the upholstery and air vents, polishing the dashboard and console, and dusting. Polishing and waxing the exterior will depend on the condition of the paint, but typically involves applying a polish and wax to make it shine.\"  # Rejected\n",
    "\n",
    "user_prompt_single = REASONING_SINGLE_PROMPT_TEMPLATE.format(\n",
    "    question=prompt,\n",
    "    answer_a=answer_a,\n",
    "    answer_b=answer_b\n",
    ") \n",
    "\n",
    "conversation = [\n",
    "    {\"role\":\"user\", \"content\": user_prompt_single}\n",
    "]\n",
    "\n",
    "input_ids = tokenizer.apply_chat_template(\n",
    "    conversation, \n",
    "    tokenize=True, \n",
    "    add_generation_prompt=True,\n",
    "    return_tensors=\"pt\"\n",
    ").to(model.device)\n",
    "\n",
    "generation = model.generate(\n",
    "    input_ids=input_ids,\n",
    "    max_new_tokens=8192, # For optimal benchmarking, set this to unlimited (e.g., 50000)\n",
    "    do_sample=False,\n",
    ")\n",
    "\n",
    "completion = tokenizer.decode(\n",
    "    generation[0][len(input_ids[0]):], \n",
    "    skip_special_tokens=True, \n",
    "    clean_up_tokenization_spaces=True\n",
    ")\n",
    "\n",
    "print(completion)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "RubricRM",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
