{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from mr_eval.utils.utils import *\n",
    "import os\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "from matplotlib import font_manager\n",
    "\n",
    "def list_jsonl_files(folder_path):\n",
    "    \"\"\"\n",
    "    列举文件夹中的所有 .jsonl 文件\n",
    "    Args:\n",
    "        folder_path (str): 文件夹路径\n",
    "    Returns:\n",
    "        List[str]: 所有 .jsonl 文件的路径\n",
    "    \"\"\"\n",
    "    return [f for f in os.listdir(folder_path) if f.endswith(\".jsonl\")]\n",
    "## Model names\n",
    "## Model names\n",
    "prm_model_name_dict = dict(\n",
    "    skyworkprm_1_5B=\"Skywork-1.5B\",\n",
    "    skyworkprm_7B=\"Skywork-PRM-7B\",\n",
    "    llemma7b_prm_prm800k=\"Llemma-PRM800k-7B\",\n",
    "    llemma7b_prm_metamath=\"Llemma-MetaMath-7B\",\n",
    "    llemma7b_oprm_prm800k=\"Llemma-oprm-7B\",\n",
    "    mathminos_mistral=\"MATHMinos-7B\",\n",
    "    mathshepherd=\"MathShepherd-7B\",\n",
    "    reasoneval7b=\"ReasonEval-7B\",\n",
    "    llama3_1_8b_prm_mistral=\"RLHFlow-PRM-Mistral-8B\",\n",
    "    llama3_1_8b_prm_deepseek=\"RLHFlow-PRM-Deepseek-8B\",\n",
    "    reasoneval34b=\"ReasonEval-34B\",\n",
    ")\n",
    "close_model_name_dict = dict(\n",
    "    gpt4o=\"GPT-4o\",\n",
    "    o1mini=\"o1-mini\",\n",
    "    o1preview=\"o1-preview\",\n",
    "    gemini_2_flash=\"Gemini-2.0-flash-exp\",\n",
    "    gemini_2_thinking=\"Gemini-thinking\",\n",
    ")\n",
    "    \n",
    "open_model_name_dict = dict(\n",
    "    qwen_qwq=\"QwQ-Preview-32B\",\n",
    ")\n",
    "all_model_name_dict = {**prm_model_name_dict, **close_model_name_dict, **open_model_name_dict}\n",
    "\n",
    "\n",
    "classification_name_dict = dict(\n",
    "    domain_inconsistency=\"DC.\",\n",
    "    redundency=\"NR.\",\n",
    "    multi_solutions=\"MS.\",\n",
    "    deception=\"DR.\",\n",
    "    confidence=\"CI.\",\n",
    "    step_contradiction=\"SC.\",\n",
    "    circular=\"NCL.\",\n",
    "    missing_condition=\"PS.\",\n",
    "    counterfactual=\"ES.\"\n",
    ")\n",
    "classification_parallel_dict = dict(\n",
    "    simplicity=dict(\n",
    "        redundency=\"NR.\",\n",
    "        circular=\"NCL.\",\n",
    "    ),\n",
    "    soundness=dict(\n",
    "        counterfactual=\"ES.\",\n",
    "        step_contradiction=\"SC.\",\n",
    "        domain_inconsistency=\"DC.\",\n",
    "        confidence=\"CI.\",\n",
    "    ),\n",
    "    sensitivity=dict(\n",
    "        missing_condition=\"PS.\",\n",
    "        deception=\"DR.\",\n",
    "        multi_solutions=\"MS.\",\n",
    "    )\n",
    ")\n",
    "classifications = [\"redundency\", \"circular\", \"counterfactual\", \"step_contradiction\", \"domain_inconsistency\",  \"confidence\", \"missing_condition\", \"deception\", \"multi_solutions\", ]\n",
    "metrics = [\"f1\", \"negative_f1\", \"total_step_acc\", \"correct_step_acc\", \"wrong_step_acc\", \"first_error_acc\", \"similarity\",]\n",
    "\n",
    "## File paths\n",
    "res_dir = \"/mnt/petrelfs/songmingyang/code/reasoning/MR_Hallucination/mr_eval/scripts/logs/prmtest_classified\"\n",
    "res_files = list_jsonl_files(res_dir)\n",
    "res_names = [f.split(\".\")[0] for f in res_files]\n",
    "res_paths = [os.path.join(res_dir, f) for f in res_files]\n",
    "file_dict = dict(zip(res_names, res_paths))\n",
    "res_dict = {k: process_jsonl(v)[-1] for k, v in file_dict.items()}\n",
    "# detailed_log_dict = {k:v[\"\"]}\n",
    "display_models = [\"reasoneval34b\",\"mathshepherd\",\"gpt4o\",\"gemini_2_thinking\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# load test data\n",
    "data_dir = \"/mnt/petrelfs/songmingyang/code/reasoning/MR_Hallucination/mr_eval/tasks/prmtest_classified/data\"\n",
    "dataset_type = \"dir_of_jsonl\"\n",
    "data_files = os.listdir(data_dir)\n",
    "data_files = [f for f in data_files if f.endswith(\".jsonl\")]\n",
    "raw_data = []\n",
    "for data_file in data_files:\n",
    "    raw_data.extend(process_jsonl(os.path.join(data_dir, data_file)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model_dict_construct \n",
    "\n",
    "model_log_dict = {}\n",
    "for model_name, model_res in res_dict.items():\n",
    "    detailed_log = model_res[\"detailed_logs\"]\n",
    "    model_log_dict[model_name] = {}\n",
    "    for log in detailed_log:\n",
    "        if  \"validitiy\" in log and not log[\"validitiy\"]:\n",
    "            continue\n",
    "        idx = log[\"idx\"]\n",
    "        wrong_step_acc_list = log[\"results\"][\"wrong_step_acc_list\"]   \n",
    "        \n",
    "        error_num = 0\n",
    "        for err in wrong_step_acc_list:\n",
    "            if err < 1:\n",
    "                error_num += 1\n",
    "                \n",
    "        res = dict(log=log,error_num=error_num)\n",
    "        model_log_dict[model_name][idx] = res\n",
    "        \n",
    "\n",
    "# build error cnt dict\n",
    "meta_data_dict = {}\n",
    "for item in raw_data:\n",
    "    idx = item[\"idx\"]\n",
    "    classification = item[\"classification\"]\n",
    "    new_idx = f\"{classification}_{idx}\"\n",
    "    total_error_cnt = 0\n",
    "    for model_name in model_log_dict.keys():\n",
    "        if new_idx in model_log_dict[model_name]:\n",
    "            total_error_cnt += model_log_dict[model_name][new_idx][\"error_num\"]\n",
    "    meta_data_dict[new_idx] = dict(item=item, error_cnt=total_error_cnt,new_idx=new_idx)\n",
    "    \n",
    "raw_data_w_error = list(meta_data_dict.values())    \n",
    "raw_data_w_error.sort(key=lambda x: x[\"error_cnt\"], reverse=True)\n",
    "\n",
    "\n",
    "def get_split_str(string,max_len=12):\n",
    "    # return string\n",
    "    str_split = string.split()\n",
    "    if len(str_split) <= max_len:\n",
    "        return string\n",
    "    \n",
    "    result = [' '.join(str_split[i:i+max_len]) for i in range(0, len(str_split), max_len)]  # 每 n 个单词分成一组\n",
    "    return ' \\\\\\\\ '.join(result)   \n",
    "\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\\textbf{Question} & \\tabincell{c}{Compute \\[\\left( 1 + \\cos \\frac {\\pi}{8} \\right) \\left( 1 + \\cos \\\\ \\frac {3 \\pi}{8} \\right) \\left( 1 + \\cos \\frac {5 \\pi}{8} \\right) \\\\ \\left( 1 + \\cos \\frac {7 \\pi}{8} \\right).\\]}& \\textbf{ReasonEval-34B} & \\textbf{MathShepherd-7B} & \\textbf{GPT-4o} & \\textbf{Gemini-thinking} \\\\\n",
      "\\textbf{Step 1} & \\tabincell{c}{Let's call the expression we're trying to compute $x$.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{mygreen}{89.1}& \\textcolor{mygreen}{87.5}& \\textcolor{mygreen}{100.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 2} & \\tabincell{c}{Use the fact that $\\cos(\\pi-a)=-\\cos a$.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{mygreen}{77.5}& \\textcolor{mygreen}{82.8}& \\textcolor{mygreen}{100.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 3} & \\tabincell{c}{Then we have $x=\\left( 1 + \\cos \\frac {\\pi}{8} \\right) \\left( 1 \\\\ + \\cos \\frac {3 \\pi}{8} \\right) \\left( 1 + \\cos \\frac {5 \\\\ \\pi}{8} \\right) \\left( 1 + \\cos \\frac {7 \\pi}{8} \\right)$.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{mygreen}{71.4}& \\textcolor{mygreen}{76.7}& \\textcolor{mygreen}{100.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 4} & \\tabincell{c}{This equals $\\left( 1 + \\cos \\frac {\\pi}{8} \\right) \\left( 1 + \\\\ \\cos \\frac {3 \\pi}{8} \\right) \\left( 1 - \\cos \\frac {\\pi}{8} \\right) \\\\ \\left( 1 - \\cos \\frac {3 \\pi}{8} \\right)$.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{myred}{20.5}& \\textcolor{mygreen}{76.9}& \\textcolor{mygreen}{100.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 5} & \\tabincell{c}{By the difference of squares, this equals $\\left(1-\\cos^2 \\frac{\\pi}{8}\\right)\\left(1-\\cos^2 \\frac{3\\pi}{8}\\right)$.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{mygreen}{56.0}& \\textcolor{mygreen}{83.7}& \\textcolor{mygreen}{100.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 6} & \\tabincell{c}{And by the Pythagorean identity, this equals $\\left(\\sin^2 \\frac{\\pi}{8}\\right)\\left(\\sin^2 \\frac{3\\pi}{8}\\right)$.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{mygreen}{63.5}& \\textcolor{mygreen}{80.4}& \\textcolor{mygreen}{100.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 7} & \\tabincell{c}{Ok, now we just need to compute the sines.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{mygreen}{84.1}& \\textcolor{mygreen}{79.1}& \\textcolor{myred}{50.0}& \\textcolor{myred}{50.0}\\\\\n",
      "\\textbf{Step 8} & \\tabincell{c}{We can use the formula for the sum of sines.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{mygreen}{73.6}& \\textcolor{mygreen}{74.1}& \\textcolor{myred}{0.0}& \\textcolor{myred}{0.0}\\\\\n",
      "\\textbf{Step 9} & \\tabincell{c}{That formula is $\\sin(a)+\\sin(b) = 2\\sin\\left(\\frac{a+b}{2}\\right)\\cos\\left(\\frac{a-b}{2}\\right)$.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{mygreen}{79.1}& \\textcolor{mygreen}{78.5}& \\textcolor{myred}{0.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 10} & \\tabincell{c}{That means that $\\sin\\frac{\\pi}{8} + \\sin\\frac{3\\pi}{8} = 2\\sin\\left(\\frac{\\pi/8+3\\pi/8}{2}\\right)\\cos\\left(\\frac{\\pi/8-3\\pi/8}{2}\\right)$.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{mygreen}{67.4}& \\textcolor{mygreen}{78.0}& \\textcolor{myred}{0.0}& \\textcolor{myred}{0.0}\\\\\n",
      "\\textbf{Step 11} & \\tabincell{c}{This equals $2\\sin\\frac{\\pi/2}{2}\\cos(-\\frac{\\pi/4})$.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{mygreen}{68.1}& \\textcolor{mygreen}{72.1}& \\textcolor{myred}{0.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 12} & \\tabincell{c}{This equals $2\\cdot 1 \\cdot \\frac{\\sqrt{2}}{2}$.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{myred}{49.2}& \\textcolor{mygreen}{73.8}& \\textcolor{myred}{0.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 13} & \\tabincell{c}{This equals $\\sqrt{2}$.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{myred}{35.1}& \\textcolor{mygreen}{73.8}& \\textcolor{myred}{0.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 14} & \\tabincell{c}{But this step doesn't help us compute the original product of sines.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{mygreen}{72.5}& \\textcolor{mygreen}{64.3}& \\textcolor{myred}{-50.0}& \\textcolor{mygreen}{100.0}\\\\\n",
      "\\textbf{Step 15} & \\tabincell{c}{So, $\\left(\\sin^2 \\frac{\\pi}{8}\\right)\\left(\\sin^2 \\frac{3\\pi}{8}\\right)$ remains the same and $x=\\frac{1}{8}$.} & \\textcolor{myred}{\\ding{55}}& \\textcolor{myred}{6.3}& \\textcolor{myred}{35.8}& \\textcolor{myred}{0.0}& \\textcolor{myred}{-100.0}\\\\\n",
      "\\textbf{Step 16} & \\tabincell{c}{Therefore, the mistake didn't change the value of $x$.} & \\textcolor{mygreen}{\\ding{51}}& \\textcolor{myred}{22.6}& \\textcolor{myred}{43.5}& \\textcolor{myred}{-100.0}& \\textcolor{myred}{-100.0}\\\\\n",
      "\\textbf{Final Acc.} & - & -   & 56.2& 50.0& 93.8& 62.5\\\\\n",
      "\n"
     ]
    }
   ],
   "source": [
    "## Form Latex str\n",
    "select_idx = 6\n",
    "max_single_line_Words = 16\n",
    "item = raw_data_w_error[select_idx][\"item\"]\n",
    "item_idx = raw_data_w_error[select_idx][\"new_idx\"]\n",
    "error_steps = item[\"error_steps\"]\n",
    "res_str = \"\"\n",
    "question = get_split_str(item[\"modified_question\"])\n",
    "question_str = f\"\\\\textbf{{Question}} & \\\\tabincell{{c}}{{{question}}}\"\n",
    "# question_str = f\"\\\\textbf{{Question}} & {question}\"\n",
    "for model_name in display_models:\n",
    "    model_display_name = all_model_name_dict[model_name]\n",
    "    question_str += f\"& \\\\textbf{{{model_display_name}}} \"\n",
    "question_str += \"\\\\\\\\\\n\"\n",
    "res_str += question_str\n",
    "\n",
    "for step_idx, step in enumerate(item[\"modified_process\"]):\n",
    "    adjusted_step_str  = get_split_str(step)\n",
    "    # if step_idx + 1 in error_steps:\n",
    "    #     adjusted_step_str = f\"\\\\textcolor{{myred}}{{{adjusted_step_str}}}\"\n",
    "    if step_idx + 1 in error_steps:\n",
    "        label = f\"\\\\textcolor{{myred}}{{\\\\ding{{55}}}}\"\n",
    "    else:\n",
    "        label = f\"\\\\textcolor{{mygreen}}{{\\\\ding{{51}}}}\"\n",
    "        \n",
    "    step_str = f\"\\\\textbf{{Step {step_idx + 1}}} & \\\\tabincell{{c}}{{{adjusted_step_str}}} & {label}\"\n",
    "    # step_str = f\"\\\\textbf{{Step {step_idx + 1}}} & {step}\"\n",
    "\n",
    "    for model_name in display_models:\n",
    "        model_log = model_log_dict[model_name][item_idx][\"log\"]\n",
    "        validity_score = model_log[\"prediction\"][\"scores\"][\"step_level_validity_scores\"]\n",
    "        current_step_validity_score = validity_score[step_idx]\n",
    "        if current_step_validity_score > 0.5:\n",
    "            step_str += f\"& \\\\textcolor{{mygreen}}{{{current_step_validity_score*100:.1f}}}\"\n",
    "        else:\n",
    "            step_str += f\"& \\\\textcolor{{myred}}{{{current_step_validity_score*100:.1f}}}\"\n",
    "    step_str += \"\\\\\\\\\\n\"\n",
    "    res_str += step_str\n",
    "\n",
    "final_acc_str = f\"\\\\textbf{{Final Acc.}} & - & -   \"\n",
    "for model_name in display_models:\n",
    "    model_log = model_log_dict[model_name][item_idx][\"log\"]\n",
    "    total_acc = model_log[\"results\"][\"total_step_acc\"]\n",
    "    final_acc_str += f\"& {total_acc*100:.1f}\"\n",
    "final_acc_str += \"\\\\\\\\\\n\"\n",
    "res_str += final_acc_str\n",
    "\n",
    "\n",
    "print(res_str)\n",
    "        \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'item': {'original_question': 'Compute \\\\[\\\\left( 1 + \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {5 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {7 \\\\pi}{8} \\\\right).\\\\]',\n",
       "  'modified_question': 'Compute \\\\[\\\\left( 1 + \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {5 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {7 \\\\pi}{8} \\\\right).\\\\]',\n",
       "  'original_process': [\"Let's call the expression we're trying to compute $x$.\",\n",
       "   'Use the fact that $\\\\cos(\\\\pi-a)=-\\\\cos a$.',\n",
       "   'Then we have $x=\\\\left( 1 + \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {5 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {7 \\\\pi}{8} \\\\right)$.',\n",
       "   'This equals $\\\\left( 1 + \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right) \\\\left( 1 - \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 - \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right)$.',\n",
       "   'By the difference of squares, this equals $\\\\left(1-\\\\cos^2 \\\\frac{\\\\pi}{8}\\\\right)\\\\left(1-\\\\cos^2 \\\\frac{3\\\\pi}{8}\\\\right)$.',\n",
       "   'And by the Pythagorean identity, this equals $\\\\left(\\\\sin^2 \\\\frac{\\\\pi}{8}\\\\right)\\\\left(\\\\sin^2 \\\\frac{3\\\\pi}{8}\\\\right)$.',\n",
       "   'Ok, now we just need to compute the sines.',\n",
       "   'We can use the formula for the product of sines.',\n",
       "   'That formula is $\\\\sin(a)\\\\sin(b) = \\\\frac{1}{2}\\\\left(\\\\cos(a-b)-\\\\cos(a+b)\\\\right)$.',\n",
       "   'That means that $\\\\sin\\\\frac{\\\\pi}{8}\\\\sin\\\\frac{3\\\\pi}{8} = \\\\frac{1}{2}\\\\left(\\\\cos\\\\frac{2\\\\pi}{8}-\\\\cos\\\\frac{4\\\\pi}{8}\\\\right)$.',\n",
       "   'This equals $\\\\frac{1}{2}\\\\left(\\\\cos\\\\frac{\\\\pi}{4}-\\\\cos\\\\frac{\\\\pi}{2}\\\\right)$.',\n",
       "   'This equals $\\\\frac{1}{2}\\\\left(\\\\frac{\\\\sqrt{2}}{2}-0\\\\right)$.',\n",
       "   'This equals $\\\\frac{\\\\sqrt{2}}{4}$.',\n",
       "   'So, $\\\\left(\\\\sin^2 \\\\frac{\\\\pi}{8}\\\\right)\\\\left(\\\\sin^2 \\\\frac{3\\\\pi}{8}\\\\right) = \\\\left(\\\\frac{\\\\sqrt{2}}{4}\\\\right)^2 = \\\\frac{1}{8}$.',\n",
       "   'Therefore, $x=\\\\frac{1}{8}$.'],\n",
       "  'modified_process': [\"Let's call the expression we're trying to compute $x$.\",\n",
       "   'Use the fact that $\\\\cos(\\\\pi-a)=-\\\\cos a$.',\n",
       "   'Then we have $x=\\\\left( 1 + \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {5 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {7 \\\\pi}{8} \\\\right)$.',\n",
       "   'This equals $\\\\left( 1 + \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right) \\\\left( 1 - \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 - \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right)$.',\n",
       "   'By the difference of squares, this equals $\\\\left(1-\\\\cos^2 \\\\frac{\\\\pi}{8}\\\\right)\\\\left(1-\\\\cos^2 \\\\frac{3\\\\pi}{8}\\\\right)$.',\n",
       "   'And by the Pythagorean identity, this equals $\\\\left(\\\\sin^2 \\\\frac{\\\\pi}{8}\\\\right)\\\\left(\\\\sin^2 \\\\frac{3\\\\pi}{8}\\\\right)$.',\n",
       "   'Ok, now we just need to compute the sines.',\n",
       "   'We can use the formula for the sum of sines.',\n",
       "   'That formula is $\\\\sin(a)+\\\\sin(b) = 2\\\\sin\\\\left(\\\\frac{a+b}{2}\\\\right)\\\\cos\\\\left(\\\\frac{a-b}{2}\\\\right)$.',\n",
       "   'That means that $\\\\sin\\\\frac{\\\\pi}{8} + \\\\sin\\\\frac{3\\\\pi}{8} = 2\\\\sin\\\\left(\\\\frac{\\\\pi/8+3\\\\pi/8}{2}\\\\right)\\\\cos\\\\left(\\\\frac{\\\\pi/8-3\\\\pi/8}{2}\\\\right)$.',\n",
       "   'This equals $2\\\\sin\\\\frac{\\\\pi/2}{2}\\\\cos(-\\\\frac{\\\\pi/4})$.',\n",
       "   'This equals $2\\\\cdot 1 \\\\cdot \\\\frac{\\\\sqrt{2}}{2}$.',\n",
       "   'This equals $\\\\sqrt{2}$.',\n",
       "   \"But this step doesn't help us compute the original product of sines.\",\n",
       "   'So, $\\\\left(\\\\sin^2 \\\\frac{\\\\pi}{8}\\\\right)\\\\left(\\\\sin^2 \\\\frac{3\\\\pi}{8}\\\\right)$ remains the same and $x=\\\\frac{1}{8}$.',\n",
       "   \"Therefore, the mistake didn't change the value of $x$.\"],\n",
       "  'modified_steps': [8, 9, 10, 11, 12, 13, 14, 15],\n",
       "  'error_steps': [8, 9, 10, 11, 12, 13, 14, 15],\n",
       "  'reason': 'A counterfactual step was introduced in steps 8 through 13 by mistakenly using the formula for the sum of sines instead of the product of sines. This leads to incorrect intermediate calculations. However, due to fortunate errors, the end result ironically matches the correct answer in step 15.',\n",
       "  'idx': 'prm_train_p1_53',\n",
       "  'question': 'Compute\\n\\\\[\\\\left( 1 + \\\\cos \\\\frac {\\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {3 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {5 \\\\pi}{8} \\\\right) \\\\left( 1 + \\\\cos \\\\frac {7 \\\\pi}{8} \\\\right).\\\\]',\n",
       "  'classification': 'counterfactual'},\n",
       " 'error_cnt': 88,\n",
       " 'new_idx': 'counterfactual_prm_train_p1_53'}"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "raw_data_w_error[select_idx]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "smoe",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
