{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from src.text_poa_graph import TextPOAGraph\n",
    "from src.new_text_alignment import TextSeqGraphAlignment\n",
    "from src.generation_methods import decode_self_verified\n",
    "import json\n",
    "import pickle\n",
    "import re"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create CONGRS for MATH/AIME and generate guided self-verification response "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "client = OpenAI()\n",
    "\n",
    "# Path to samples file of MATH/AIME\n",
    "with open(\"results/MATH/5_samples_math_gen_qwen72b.json\", \"r\") as file:\n",
    "    input = json.load(file)\n",
    "\n",
    "for question in input:     \n",
    "    responses = question['model_responses']\n",
    "    samples = [response.strip() for response in responses]\n",
    "    graph = TextPOAGraph(samples[0], label=0)\n",
    "    for i, sample in enumerate(samples[1:]):\n",
    "        alignment = TextSeqGraphAlignment(text=sample,\n",
    "                                        graph = graph,\n",
    "                                        fastMethod=True,\n",
    "                                        globalAlign=True,\n",
    "                                        matchscore=1,\n",
    "                                        mismatchscore=-2,\n",
    "                                        gap_open=-1\n",
    "                                        )\n",
    "        graph.incorporateSeqAlignment(alignment, sample, label=i+1)\n",
    "    \n",
    "    graph.refine_graph(domain=\"math\")\n",
    "    path = question['index']\n",
    "    # Save CONGR file in graphs directory\n",
    "    graph.save_to_pickle(f'results/MATH/qwen_graphs/qwen72_math_{path}.pkl')\n",
    "    # Use the same model to self-verify as the one which generated the samples. We use a together API key to access our models.  \n",
    "    decoded_response, _ = decode_self_verified(\n",
    "        text_poa_graph=graph,\n",
    "        problem=question[\"problem\"],\n",
    "        uncertainty_threshold=0.7,\n",
    "        verification_api=\"together\",\n",
    "        verification_model=\"Qwen/Qwen2.5-72B-Instruct\",\n",
    "    )\n",
    "    question['decoded'] = decoded_response\n",
    "\n",
    "with open(\"results/MATH/5_samples_math_gen_qwen72b.json\", \"w\") as outfile:\n",
    "    json.dump(input, outfile, indent=4)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
