{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pickle as pkl\n",
    "import os\n",
    "\n",
    "cot = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/FewShotCOTCLUTRR_Thu_Jan__2_00.41.44_2025_iter2'\n",
    "\n",
    "cot = np.load(open(cot, 'rb'), allow_pickle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(cot)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from datasets import load_dataset\n",
    "\n",
    "# Login using e.g. `huggingface-cli login` to access this dataset\n",
    "folio_og = load_dataset(\"yale-nlp/FOLIO\")\n",
    "folio_og = folio_og['train']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "folio_syn = json.load(open('//home/XXXX/XXXX/fs_backup_feb13/SAT-LM/data/folio_proofd5_test.json', 'r'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "folio_new = {}\n",
    "for f in folio_og:\n",
    "    folio_new[f['example_id']] = f"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "folio_new_write = []\n",
    "for name in names:\n",
    "    folio_new_write.append(folio_new[folio_syn[name]['example_id']])\n",
    "    folio_new_write[-1]['context'] = folio_new_write[-1]['premises'].split('\\n')\n",
    "    folio_new_write[-1]['question'] = folio_new_write[-1]['conclusion']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "json.dump(folio_new_write, open('/home/XXXX/XXXX/fs_backup_feb13/SAT-LM/data/folio_new.json', 'w') )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "def get_bb(file, del_sols=None):\n",
    "    bb = {'pos':  [], 'neg': []}\n",
    "    \n",
    "    files = ['/'.join(file.split('/')[:-1]) + '/pos_' + file.split('/')[-1], '/'.join(file.split('/')[:-1]) + '/neg_' + file.split('/')[-1] ]\n",
    "    for i in range(len(files)):\n",
    "        file = files[i]\n",
    "        shutil.copy(file, '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]))\n",
    "        if not del_sols==None:\n",
    "            if 'pos' in file:\n",
    "                if 'neg' in file:\n",
    "                    print('l. 416 uh oh')\n",
    "                      \n",
    "                ds = del_sols['pos']\n",
    "            elif 'neg' in file:\n",
    "                ds = del_sols['neg']\n",
    "            for sol in ds:\n",
    "                add_clause('/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]))\n",
    "                cf = open(f'/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]), 'a')\n",
    "                write_str = '\\n'\n",
    "                for lit in sol:\n",
    "                    write_str += str(-lit) + ' '\n",
    "                # write_str += '0'\n",
    "                cf.write(write_str)\n",
    "                cf.close()\n",
    "        # print('running cadical')\n",
    "        os.system(\"timeout 5000 /home/XXXX/XXXX/fs_backup_feb13/LLM-project/cadiback/cadiback \" + '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]) + '> '  + '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1])[:-4] + \".bbone\")\n",
    "        #   \n",
    "        bbone= open('/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1])[:-4] + \".bbone\", 'r')\n",
    "        lines = bbone.readlines()\n",
    "        #   \n",
    "        for line in lines:\n",
    "            if line.startswith('b'):\n",
    "                #   \n",
    "                lits = line.split(' ')[1:]\n",
    "                for lit in lits:\n",
    "                    lit = lit.strip()\n",
    "                    if lit == '0':\n",
    "                        continue\n",
    "                    lit = int(lit)\n",
    "                    if 'pos' in file:                                \n",
    "                        if 'neg' in file:\n",
    "                            print('l. 447 uh oh')\n",
    "                              \n",
    "                        bb['pos'].append(lit)\n",
    "                    elif 'neg' in file:\n",
    "                            bb['neg'].append(lit)\n",
    "\n",
    "    return bb\n",
    "\n",
    "c = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/dimacs_csvs/solver_finished.csv'\n",
    "import csv\n",
    "import json\n",
    "dataset = '/home/XXXX/XXXX/fs_backup_feb13/SAT-LM/data/folio_proofd5_test.json'\n",
    "with open(dataset, 'r') as df:\n",
    "    data = json.loads(df.read())\n",
    "\n",
    "task = 'folio'\n",
    "nameed=False\n",
    "c = open(c, 'r')\n",
    "cr = csv.reader(c)\n",
    "names = []\n",
    "all_outs = {}\n",
    "nameed_list = []\n",
    "labels = {}\n",
    "for row in cr:\n",
    "    if row[2] == 'SAT' and row[3] == 'SAT':\n",
    "        cnf = open('/home/XXXX/XXXX/fs_backup_feb13/LLM-project/dimacs/neg_'+row[1]).readlines()[0].strip('\\n')\n",
    "        num_clause = int(cnf.split(' ')[-1])\n",
    "       \n",
    "        if task=='folio':\n",
    "            bb = get_bb('/home/XXXX/XXXX/fs_backup_feb13/LLM-project/dimacs/'+row[1])\n",
    "            jb = set(bb['pos']).intersection(set(bb['neg']))\n",
    "            if len(jb) == 0:\n",
    "                continue\n",
    "        # if num_clause > 500:\n",
    "            # continue\n",
    "        names.append(int(row[1].split('proofd5')[1].split('.cnf')[0]))\n",
    "        labels[row[1]] = data[int(row[1].split('proofd5')[1].split('.')[0])]['label']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(len(labels))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "int(row[1].split('clutrr')[1].split('.cnf')[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "i = 0\n",
    "cot_acc = 0\n",
    "cot_preds = {}\n",
    "for key, value in labels.items():\n",
    "    if cot[i] == value:\n",
    "        cot_acc += 1\n",
    "        cot_preds[key] = True\n",
    "    else:\n",
    "        cot_preds[key] = False\n",
    "    i += 1\n",
    "print(cot_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "few_shot = \"Facts:n[Nancy] likes to cut the hair of her daughter [Heidi].\\n[Heidi]'s sister [Lorraine] went to beauty school and taught them all how to cut hair expertly. \" + \\\n",
    "            \"\\nHere are some additional facts and rules we\\'ve found:\\nNancy is the mother of Lorraine\\n If Heidi is the sister of Lorraine and Heidi is the daughter of Nancy then Nancy is the mother of Lorraine.\\n\" + \\\n",
    "            \"Question: Is the following statement true: \\n\\\"[Lorraine] is [Nancy]\\'s daughter\\\"\\nAnswer: Let\\'s think step by step. \\n1. We have already found that Nancy is the mother of Lorraine.\\n2. If Nancy is the mother of Lorraine, then Lorraine is the daughter of Nancy.\\nTherefore, the answer to the question is Yes, the statement is true. \\n***\\n\" + \\\n",
    "            \"Facts:\\n[Dale] and his sister [Nancy] are decorating for a party.\\n[Nancy]'s daughter [Louise] thinks the party will be fun.\\n\" + \\\n",
    "            \"Here are some additional facts and rules we\\'ve found:\\nDale is the uncle of Louise. If Nancy is the sister of Dale and Nancy is the mother of Louise then Dale is the uncle of Louise.\\n\" + \\\n",
    "            \"Question: Is the following statement true: \\n\\\"[Louise] is not [Dales]\\'s niece\\\"\\n\" + \\\n",
    "            \"Answer: Let\\'s think step by step. 1. We are given that Dale is the uncle of Louise.\\n2.If Dale is the uncle of Louise, then Louise is the niece of Dale.\\nTherefore, the answer is No, the statement is not true.\\n***\\n\" + \\\n",
    "            \"Facts: \\n[Lillian] and her sister [Nancy] are the only children in their family. \\n[Lillian]'s biggest accomplishment is raising her son [Douglas]. \" + \\\n",
    "            \"\\nHere are some additional facts and rules we\\'ve found:\\nLillian is the sister of Nancy. \\nIf Nancy is the sister if Lillian then Lillian is the sister of Nancy.\\n\" + \\\n",
    "            \"Question: Is the following statement true: \\n\\\"[Douglas] is [Nancy]\\'s nephew\\\"\\nAnswer: Let\\'s think step by step. \\n1. [Douglas] is [Lillian]\\'s son. \\n2. [Nancy] is [Lillian]\\'s sister. \" + \\\n",
    "            \"3\\n. [Douglas] is [Nancy]\\'s nephew. \\nTherefore, the answer to the question is Yes, the statement is true. \\n***\\n\" + \\\n",
    "            \"Facts: \\n[Ashley] liked to go to the park with her granddaughter [Charlotte]. \\n[Dale], [Charlotte]'s father, like to take her to the movies instead. \" + \\\n",
    "            \"\\nHere are some additional facts and rules we\\'ve found:\\nDale is the son of Ashley. If Dale is father of Charlotte and Ashley is the grandmother of Charlotte then Dale is the son of Ashley.\\n\" + \\\n",
    "            \"Question: Is the following statement true: \\n\\\"[Ashley] is not [Dale]\\'s mother\\\"\\nAnswer: Let\\'s think step by step. \\n1. We are given that Dale is the son of Ashley. \\n2. If Dale is the son of Ashley, then Ashley is the mother of Dale. \" + \\\n",
    "            \"\\nTherefore, the answer to the question is No, the statement is ot true.\\n***\\n\"\n",
    "\n",
    "ans = few_shot + 'a;sldkfj;alskdjf***'\n",
    "print(ans.split('***')[4])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = 'grandson_of_james\\'_sibling_James_Donald_'\n",
    "split = a.split('_')\n",
    "rel_str = ''\n",
    "for a in split[:-1]:\n",
    "    rel_str += a + '-'\n",
    "rel_str = rel_str[:-1]\n",
    "print(rel_str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "import copy\n",
    "# cot_iter = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/LOT_folio_8B_preds_iter'\n",
    "# cot_iter = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/FewShotCOTFOLIO_Wed_Jan__1_06.25.21_2025_iter'\n",
    "# cot_iter ='/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/LOT_folio_8B_preds_iter'\n",
    "# cot_iter = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/mistral_FewShotCOTFOLIO_Tue_Apr_29_13.38.54_2025_iter'\n",
    "# cot_iter = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/LOT_folio_M7B_preds_iter'\n",
    "# cot_iter = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/FewShotCOTFOLIO_Thu_Jan__2_00.41.44_2025_iter'\n",
    "cot_iter = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/preds/mistral_FewShotCOT_folionew_Mon_May_12_11.51.34_2025_iter'\n",
    "cot_pred = []\n",
    "cot_pred_list = []\n",
    "cot_accs = []\n",
    "for i in range(20):\n",
    "    cot = np.load(open(cot_iter + str(i), 'rb'), allow_pickle=True)\n",
    "    \n",
    "    cot_acc = 0\n",
    "    cot_preds = {}\n",
    "    cot_preds_list = []\n",
    "    j = 0\n",
    "    for key, value in labels.items():\n",
    "        # print(value, [i])\n",
    "        if cot[j] == value:\n",
    "            cot_acc += 1\n",
    "            cot_preds[key] = True\n",
    "            cot_preds_list.append(1)\n",
    "        else:\n",
    "            cot_preds[key] = False\n",
    "            cot_preds_list.append(0)\n",
    "        j += 1\n",
    "    print(cot_acc/len(cot))\n",
    "    cot_accs.append(cot_acc)\n",
    "    cot_pred.append(copy.deepcopy(cot_preds))\n",
    "    cot_pred_list.append(copy.deepcopy(cot_preds_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cot_pred_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "n_votes = []\n",
    "sc_pred = {}\n",
    "for i in range(len(cot_pred_list[0])):\n",
    "    n_votes.append(0)\n",
    "    for j in range(len(cot_pred_list)):\n",
    "    # for j in range(5):\n",
    "        n_votes[-1] += cot_pred_list[j][i]\n",
    "sc_acc = 0\n",
    "for key, value in cot_pred[0].items():\n",
    "    tmp = 0\n",
    "    for j in cot_pred:\n",
    "        tmp+= j[key]\n",
    "    if tmp >=np.ceil(len(cot_pred_list)/2+0.5): \n",
    "        sc_pred[key] = 1\n",
    "        sc_acc += 1\n",
    "    \n",
    "    else: sc_pred[key]=0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "cot = np.load(open(cot_iter + str(9) + '_[0, 1]', 'rb'), allow_pickle=True).item()\n",
    "idxd = {'true':0, 'false': 1}\n",
    "n_votes = []\n",
    "for key, value in cot.items():\n",
    "    for key, value in labels.items():\n",
    "        n_votes.append(cot[key][idxd[labels[key]]]-1)\n",
    "    # n_votes.append("
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "len(cot_pred_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "print(np.sum(np.where(np.array(n_votes) >= np.ceil(len(cot_pred_list)/2+0.5), 1, 0) ))\n",
    "# print(np.sum(np.where(np.array(n_votes) >= 3, 1, 0) ))\n",
    "# print('sc acc:',np.sum(np.where(np.array(n_votes) >= 3, 1, 0) )/len(cot))\n",
    "\n",
    "print('sc acc:',np.sum(np.where(np.array(n_votes) >= (np.ceil(len(cot_pred_list)/2 + 0.5)), 1, 0) )/len(cot))\n",
    "print('cot acc:', np.mean(cot_accs)/len(cot))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "bs_sc = []\n",
    "bs_sc_acc = []\n",
    "for i in range(len(n_votes)):\n",
    "    bs_sc.append(resample(n_votes, n_samples=86))\n",
    "    bs_sc_acc.append(np.sum(np.where(np.array(bs_sc[-1]) >= (np.ceil((20)/2+0.5)), 1, 0) )/len(bs_sc[-1]))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# outs_str = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/all_outs_cot_met_folio_rulethresh07_thresh05_varname_llama70B_varname_cotThresh08'\n",
    "# outs_str = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/all_outs_cot_met_folio_thresh05_rulethresh07_sc20_8B.pkl'\n",
    "outs_str = '/home/XXXX/XXXX//fs_backup_feb13/LLM-project/all_outs_cot_met_folio_L8B_ablate_all'\n",
    "outs = np.load(open(outs_str, 'rb'), allow_pickle=True).item()\n",
    "bs_outs_acc = []\n",
    "outs_pred = {}\n",
    "outs_acc = 0\n",
    "num_trues = 0\n",
    "for key, value in outs.items():\n",
    "    if len(value[1]['neg']) == 0 and labels[key] == 'false':\n",
    "        outs_pred[key] = True\n",
    "        outs_acc += 1\n",
    "    elif len(value[1]['pos']) == 0 and labels[key] == 'true':\n",
    "        outs_pred[key] = True\n",
    "        outs_acc += 1\n",
    "    else:\n",
    "        outs_pred[key] = False\n",
    "    if labels[key] == 'true':\n",
    "        num_trues += 1\n",
    "outs_acc /= len(outs_pred.keys())\n",
    "outs_pred_val = np.array(list(outs_pred.values()))\n",
    "\n",
    "for i in range(len(outs_pred)):\n",
    "    bs_outs_acc.append(np.sum(resample(outs_pred_val, n_samples=86))/86)\n",
    "# outs['clutrr545.cnf'][1]\n",
    "print(outs_acc)\n",
    "print(outs_acc*len(outs_pred.keys()))\n",
    "print(len(outs))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from scipy.stats import wilcoxon\n",
    "print(wilcoxon(np.array(bs_outs_acc) - np.array(bs_sc_acc), alternative='greater'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "np.mean(np.array(bs_outs_acc) - np.array(bs_sc_acc))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "np.sum(list(outs_pred_val))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "np.std(bs_outs_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "from scipy import stats\n",
    "\n",
    "confidence_level=0.95\n",
    "d = bs_outs_acc\n",
    "ci = stats.t.interval(confidence_level, df=len(d)-1, loc=np.mean(d), scale=np.std(d, ddof=1) / np.sqrt(len(d)))\n",
    "print(ci)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "np.mean(bs_outs_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(cot_pred_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc_acc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs_str = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/all_outs_cot_met_folio_cotthresh1_05thresh_03rulethresh_70B.pkl'\n",
    "outs = pkl.load(open(outs_str, 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "z = 0\n",
    "for i in range(len(outs)):\n",
    "    if len(outs[list(outs.keys())[i]][-1]) > 2:\n",
    "        # print(outs[list(outs.keys())[i]][-1])\n",
    "        z += 1\n",
    "# len(\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(z)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(outs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs_str = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/all_outs_cot_met_folio_cotthresh1_05thresh_03rulethresh_70B.pkl'\n",
    "outs = pkl.load(open(outs_str, 'rb'))\n",
    "\n",
    "outs_pred = {}\n",
    "outs_acc = 0\n",
    "num_trues = 0\n",
    "for key, value in outs.items():\n",
    "    if len(value[1]['neg']) == 0 and labels[key] == 'false':\n",
    "        outs_pred[key] = True\n",
    "        outs_acc += 1\n",
    "    elif len(value[1]['pos']) == 0 and labels[key] == 'true':\n",
    "        outs_pred[key] = True\n",
    "        outs_acc += 1\n",
    "    else:\n",
    "        outs_pred[key] = False\n",
    "    if labels[key] == 'true':\n",
    "        num_trues += 1\n",
    "outs_acc /= len(outs_pred.keys())\n",
    "# outs['clutrr545.cnf'][1]\n",
    "print(outs_acc)\n",
    "print(outs_acc*len(outs_pred.keys()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# /home/XXXX/XXXX/fs_backup_feb13/all_outs_thresh09_rulethresh04_contexthresh05_dynamicTure.pkl\n",
    "missed = pkl.load(open('//home/XXXX/XXXX/fs_backup_feb13//missed_list_' + outs_str, 'rb'))\n",
    "hunh_list = pkl.load(open('/home/XXXX/XXXX/fs_backup_feb13/hunh_' + outs_str, 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "hunh = []\n",
    "missed_list = []\n",
    "for miss in missed:\n",
    "    missed_list.append(miss[0])\n",
    "for hunh in hunh_list:\n",
    "    missed_list.append(hunh)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(missed_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "miss_acc = 0\n",
    "miss_sc_acc = 0\n",
    "no_gains = 0\n",
    "for miss in missed_list:\n",
    "    if outs_pred[miss] == True:\n",
    "        miss_acc += 1\n",
    "    # print(outs_pred[miss])\n",
    "    if sc_pred[miss] == True:\n",
    "        miss_sc_acc += 1\n",
    "    if sc_pred[miss] == False and sc_pred[miss] == False:\n",
    "        no_gains += 1\n",
    "    if sc_pred[miss] == True and outs_pred[miss] == True:\n",
    "        no_gains += 1\n",
    "print(miss_acc/len(missed_list))\n",
    "print(miss_sc_acc/len(missed_list))\n",
    "# print(no_gains/len(missed_list))\n",
    "print(miss_sc_acc - miss_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(missed_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tt = []\n",
    "tf = []\n",
    "ft = []\n",
    "ff = []\n",
    "for miss in missed_list:\n",
    "    if sc_pred[miss] == True and outs_pred[miss] == True:\n",
    "        tt.append(miss)\n",
    "    elif sc_pred[miss] == True and outs_pred[miss] == False:\n",
    "        tf.append(miss)\n",
    "    elif sc_pred[miss] == False and outs_pred[miss] == True:\n",
    "        ft.append(miss)\n",
    "    elif sc_pred[miss] == False and outs_pred[miss] == False:\n",
    "        ff.append(miss)\n",
    "    \n",
    "print(len(tt), len(tf), len(ft), len(ff))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tt = []\n",
    "tf = []\n",
    "ft = []\n",
    "ff = []\n",
    "for name in labels.keys():\n",
    "    # if name in missed_list:continue\n",
    "    \n",
    "    if sc_pred[name] == True and outs_pred[name] == True:\n",
    "        tt.append(name)\n",
    "    elif sc_pred[name] == True and outs_pred[name] == False:\n",
    "        tf.append(name)\n",
    "    elif sc_pred[name] == False and outs_pred[name] == True:\n",
    "        ft.append(name)\n",
    "    elif sc_pred[name] == False and outs_pred[name] == False:\n",
    "        ff.append(name)\n",
    "    \n",
    "print(len(tt), len(tf), len(ft), len(ff))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "(5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.hist(n_votes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tt = []\n",
    "tf = []\n",
    "ft = []\n",
    "ff = []\n",
    "for name in labels.keys():\n",
    "    if name in missed_list:continue\n",
    "    \n",
    "    if cot_pred[0][name] == True and outs_pred[name] == True:\n",
    "        tt.append(name)\n",
    "    elif cot_pred[0][name] == True and outs_pred[name] == False:\n",
    "        tf.append(name)\n",
    "    elif cot_pred[0][name] == False and outs_pred[name] == True:\n",
    "        ft.append(name)\n",
    "    elif cot_pred[0][name] == False and outs_pred[name] == False:\n",
    "        ff.append(name)\n",
    "    \n",
    "print(len(tt), len(tf), len(ft), len(ff))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tt = []\n",
    "tf = []\n",
    "ft = []\n",
    "ff = []\n",
    "for name in missed_list:\n",
    "    # if name in missed_list:continue\n",
    "    \n",
    "    if cot_pred[0][name] == True and outs_pred[name] == True:\n",
    "        tt.append(name)\n",
    "    elif cot_pred[0][name] == True and outs_pred[name] == False:\n",
    "        tf.append(name)\n",
    "    elif cot_pred[0][name] == False and outs_pred[name] == True:\n",
    "        ft.append(name)\n",
    "    elif cot_pred[0][name] == False and outs_pred[name] == False:\n",
    "        ff.append(name)\n",
    "    \n",
    "print(len(tt), len(tf), len(ft), len(ff))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(tf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(missed_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "scores = pkl.load(open('/home/XXXX/XXXX/fs_backup_feb13/LLM-project/scores_temp1_thresh075_thresh05_dynFalse_fixed.pkl', 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(tf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "i = 1\n",
    "for line in outs['clutrr60.cnf'][0]:\n",
    "    print(line)\n",
    "    if i % 4 == 3:\n",
    "        # try:\n",
    "            # print(line.split('known predicate: ')[1].split('. Known predicates are')[0].replace('___', line.split('\\\\box{ ')[1]))\n",
    "        print(scores[line.split('known predicate: ')[1].split('. Known predicates are')[0].replace('___', line.split('\\\\box{ ')[1])])\n",
    "        # except:\n",
    "        #     print(line.split('known predicate: ')[1].split('. Known predicates are')[0].replace('___', line.split('\\\\box{ ')[1]))\n",
    "            # print(line)\n",
    "        #     break\n",
    "    if i%4 == 0 and not str(line).startswith('calls'):\n",
    "        # continue\n",
    "        i = 2\n",
    "        # print('hihi')\n",
    "\n",
    "    else: i += 1\n",
    "# print(outs['clutrr125.cnf'])\n",
    "# print(outs.keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(list(scores.keys())[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tt = []\n",
    "tf = []\n",
    "ft = []\n",
    "ff = []\n",
    "for miss in missed_list:\n",
    "    if cot_pred[0][miss] == True and outs_pred[miss] == True:\n",
    "        tt.append(miss)\n",
    "    elif cot_pred[0][miss] == True and outs_pred[miss] == False:\n",
    "        tf.append(miss)\n",
    "    elif cot_pred[0][miss] == False and outs_pred[miss] == True:\n",
    "        ft.append(miss)\n",
    "    elif cot_pred[0][miss] == False and outs_pred[miss] == False:\n",
    "        ff.append(miss)\n",
    "    \n",
    "print(len(tt), len(tf), len(ft), len(ff))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "score_list = []\n",
    "for score in scores.values():\n",
    "    score_list.append(torch.stack(score))\n",
    "score = torch.stack(score_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from matplotlib import pyplot as plt\n",
    "\n",
    "fig1, ax1 = plt.subplots()\n",
    "ax1.scatter(x=score[:,0], y=score[:,1], s=3)\n",
    "ax1.set_xlabel('1 - Does the following rule seem contradictory?')\n",
    "ax1.set_ylabel('Does the following rule seem contextually relevant?')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs_acc*60"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc_acc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key, value in outs_pred.items():\n",
    "    if key not in missed_list and value == False:\n",
    "        print(key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(outs['clutrr366.cnf']):\n",
    "    print(outs['clutrr366.cnf'][i])\n",
    "# print(outs['clutrr366.cnf'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "n = 7\n",
    "for i in range(len(missed[n][1])):\n",
    "    print(missed[n][1][i])\n",
    "    # print('\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs_pred = {}\n",
    "outs_acc = 0\n",
    "num_trues = 0\n",
    "for key, value in outs.items():\n",
    "    if len(value[1]['neg']) == 0 and labels[key] == 'false':\n",
    "        outs_pred[key] = True\n",
    "        outs_acc += 1\n",
    "    elif len(value[1]['pos']) == 0 and labels[key] == 'true':\n",
    "        outs_pred[key] = True\n",
    "        outs_acc += 1\n",
    "    else:\n",
    "        outs_pred[key] = False\n",
    "    if labels[key] == 'true':\n",
    "        num_trues += 1\n",
    "outs_acc /= len(outs_pred.keys())\n",
    "# outs['clutrr545.cnf'][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs_acc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels[key]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "def get_bb(file, del_sols=None):\n",
    "    bb = {'pos':  [], 'neg': []}\n",
    "    \n",
    "    files = ['/'.join(file.split('/')[:-1]) + '/pos_' + file.split('/')[-1], '/'.join(file.split('/')[:-1]) + '/neg_' + file.split('/')[-1] ]\n",
    "    for i in range(len(files)):\n",
    "        file = files[i]\n",
    "        shutil.copy(file, '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]))\n",
    "        if not del_sols==None:\n",
    "            if 'pos' in file:\n",
    "                if 'neg' in file:\n",
    "                    print('l. 416 uh oh')\n",
    "                      \n",
    "                ds = del_sols['pos']\n",
    "            elif 'neg' in file:\n",
    "                ds = del_sols['neg']\n",
    "            for sol in ds:\n",
    "                add_clause('/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]))\n",
    "                cf = open(f'/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]), 'a')\n",
    "                write_str = '\\n'\n",
    "                for lit in sol:\n",
    "                    write_str += str(-lit) + ' '\n",
    "                # write_str += '0'\n",
    "                cf.write(write_str)\n",
    "                cf.close()\n",
    "        # print('running cadical')\n",
    "        os.system(\"timeout 5000 /home/XXXX/XXXX/fs_backup_feb13/LLM-project/cadiback/cadiback \" + '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]) + '> '  + '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1])[:-4] + \".bbone\")\n",
    "        #   \n",
    "        bbone= open('/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1])[:-4] + \".bbone\", 'r')\n",
    "        lines = bbone.readlines()\n",
    "        #   \n",
    "        for line in lines:\n",
    "            if line.startswith('b'):\n",
    "                #   \n",
    "                lits = line.split(' ')[1:]\n",
    "                for lit in lits:\n",
    "                    lit = lit.strip()\n",
    "                    if lit == '0':\n",
    "                        continue\n",
    "                    lit = int(lit)\n",
    "                    if 'pos' in file:                                \n",
    "                        if 'neg' in file:\n",
    "                            print('l. 447 uh oh')\n",
    "                              \n",
    "                        bb['pos'].append(lit)\n",
    "                    elif 'neg' in file:\n",
    "                            bb['neg'].append(lit)\n",
    "\n",
    "    return bb\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import shutil\n",
    "\n",
    "def get_bb(file, del_sols=None):\n",
    "    bb = {'pos':  [], 'neg': []}\n",
    "    \n",
    "    files = ['/'.join(file.split('/')[:-1]) + '/pos_' + file.split('/')[-1], '/'.join(file.split('/')[:-1]) + '/neg_' + file.split('/')[-1] ]\n",
    "    for i in range(len(files)):\n",
    "        file = files[i]\n",
    "        shutil.copy(file, '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]))\n",
    "        if not del_sols==None:\n",
    "            if 'pos' in file:\n",
    "                if 'neg' in file:\n",
    "                    print('l. 416 uh oh')\n",
    "                      \n",
    "                ds = del_sols['pos']\n",
    "            elif 'neg' in file:\n",
    "                ds = del_sols['neg']\n",
    "            for sol in ds:\n",
    "                add_clause('/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]))\n",
    "                cf = open(f'/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]), 'a')\n",
    "                write_str = '\\n'\n",
    "                for lit in sol:\n",
    "                    write_str += str(-lit) + ' '\n",
    "                # write_str += '0'\n",
    "                cf.write(write_str)\n",
    "                cf.close()\n",
    "        # print('running cadical')\n",
    "        os.system(\"timeout 5000 /home/XXXX/XXXX/fs_backup_feb13/LLM-project/cadiback/cadiback \" + '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1]) + '> '  + '/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1])[:-4] + \".bbone\")\n",
    "        #   \n",
    "        bbone= open('/'.join(file.split('/')[:-2]) + '/tempfiles/' + str(file.split('/')[-1])[:-4] + \".bbone\", 'r')\n",
    "        lines = bbone.readlines()\n",
    "        #   \n",
    "        for line in lines:\n",
    "            if line.startswith('b'):\n",
    "                #   \n",
    "                lits = line.split(' ')[1:]\n",
    "                for lit in lits:\n",
    "                    lit = lit.strip()\n",
    "                    if lit == '0':\n",
    "                        continue\n",
    "                    lit = int(lit)\n",
    "                    if 'pos' in file:                                \n",
    "                        if 'neg' in file:\n",
    "                            print('l. 447 uh oh')\n",
    "                              \n",
    "                        bb['pos'].append(lit)\n",
    "                    elif 'neg' in file:\n",
    "                            bb['neg'].append(lit)\n",
    "\n",
    "    return bb\n",
    "\n",
    "c = '/home/XXXX/XXXX/fs_backup_feb13/LLM-project/dimacs_csvs_debug/solver_finished.csv'\n",
    "import csv\n",
    "import json\n",
    "dataset = '/home/XXXX/XXXX/fs_backup_feb13/SAT-LM/data/unfixed_proofd5_test.json'\n",
    "with open(dataset, 'r') as df:\n",
    "    data = json.loads(df.read())\n",
    "\n",
    "task = 'folio'\n",
    "missed=False\n",
    "c = open(c, 'r')\n",
    "cr = csv.reader(c)\n",
    "names = []\n",
    "all_outs = {}\n",
    "missed_list = []\n",
    "labels = {}\n",
    "for row in cr:\n",
    "    if row[2] == 'SAT' and row[3] == 'SAT':\n",
    "        cnf = open('/home/XXXX/XXXX/fs_backup_feb13/LLM-project/dimacs/neg_'+row[1]).readlines()[0].strip('\\n')\n",
    "        num_clause = int(cnf.split(' ')[-1])\n",
    "       \n",
    "        if task=='folio':\n",
    "            bb = get_bb('/home/XXXX/XXXX/fs_backup_feb13/LLM-project/dimacs/'+row[1])\n",
    "            jb = set(bb['pos']).intersection(set(bb['neg']))\n",
    "            if len(jb) == 0:\n",
    "                continue\n",
    "        # if num_clause > 500:\n",
    "            # continue\n",
    "        names.append(int(row[1].split('proofd5')[1].split('.cnf')[0]))\n",
    "        labels[row[1]] = data[int(row[1].split('proofd5')[1].split('.')[0])]['label']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bad_data = []\n",
    "mistr_data = []\n",
    "noisy_data=[]\n",
    "c = '/home/XXXX/LLM-project/dimacs_csvs_debug/solver_finished.csv'\n",
    "import csv\n",
    "import json\n",
    "dataset = '/home/XXXX/SAT-LM/data/unfixed_proofd5_test.json'\n",
    "with open(dataset, 'r') as df:\n",
    "    data = json.loads(df.read())\n",
    "# breakpoint()\n",
    "task = 'folio'\n",
    "missed=False\n",
    "c = open(c, 'r')\n",
    "cr = csv.reader(c)\n",
    "names = []\n",
    "all_outs = {}\n",
    "missed_list = []\n",
    "labels = {}\n",
    "for row in cr:\n",
    "        if row[2] == 'SAT' and row[3] == 'SAT':\n",
    "            cnf = open('/home/XXXX/LLM-project/dimacs/neg_'+row[1]).readlines()[0].strip('\\n')\n",
    "            num_clause = int(cnf.split(' ')[-1])\n",
    "            if row[1] in noisy_data or row[1] in mistr_data:\n",
    "                continue\n",
    "            if task=='folio':\n",
    "                bb = get_bb('/home/XXXX/LLM-project/dimacs/'+row[1])\n",
    "                jb = set(bb['pos']).intersection(set(bb['neg']))\n",
    "                if len(jb) == 0:\n",
    "                    continue\n",
    "            # if num_clause > 500:\n",
    "                # continue\n",
    "            if row[1] in bad_data:\n",
    "                continue\n",
    "            names.append(row[1])\n",
    "            labels[row[1]] = data[int(row[1].split('proofd5')[1].split('.')[0])]['label']\n",
    "    #   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "len(names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "folio = json.load(open('/home/XXXX/SAT-LM/data/unfixed_clutrr_test.json', 'r'))\n",
    "folio[48]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "i = 0\n",
    "cot_acc = 0\n",
    "cot_preds = {}\n",
    "for key, value in labels.items():\n",
    "    if cot[i] == value:\n",
    "        cot_acc += 1\n",
    "        cot_preds[key] = True\n",
    "    else:\n",
    "        cot_preds[key] = False\n",
    "    i += 1\n",
    "print(cot_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flipped = 0\n",
    "flipped_names = []\n",
    "tf = []\n",
    "ft = []\n",
    "for name in names:\n",
    "    if cot_preds['proofd5' + str(name) + '.cnf'] != outs_pred['proofd5' + str(name) + '.cnf']:\n",
    "        flipped_names.append('proofd5' + str(name) + '.cnf')\n",
    "        flipped += 1\n",
    "    if cot_preds['proofd5' + str(name) + '.cnf'] == True and outs_pred['proofd5' + str(name) + '.cnf'] == False:\n",
    "        tf.append('proofd5' + str(name) + '.cnf')\n",
    "    if cot_preds['proofd5' + str(name) + '.cnf'] == False and outs_pred['proofd5' + str(name) + '.cnf'] == True:\n",
    "        ft.append('proofd5' + str(name) + '.cnf')\n",
    "\n",
    "print(flipped)\n",
    "print(len(tf))\n",
    "print(len(ft))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "flipped = 0\n",
    "flipped_names = []\n",
    "tf = []\n",
    "ft = []\n",
    "for name in missed_list:\n",
    "    name = name[7:-4]\n",
    "    if cot_preds['clutrr' + str(name) + '.cnf'] != outs_pred['clutrr' + str(name) + '.cnf']:\n",
    "        flipped_names.append('clutrr' + str(name) + '.cnf')\n",
    "        flipped += 1\n",
    "    if cot_preds['clutrr' + str(name) + '.cnf'] == True and outs_pred['proofd5' + str(name) + '.cnf'] == False:\n",
    "        tf.append('proofd5' + str(name) + '.cnf')\n",
    "    if cot_preds['proofd5' + str(name) + '.cnf'] == False and outs_pred['proofd5' + str(name) + '.cnf'] == True:\n",
    "        ft.append('proofd5' + str(name) + '.cnf')\n",
    "\n",
    "print(flipped)\n",
    "print(len(tf))\n",
    "print(len(ft))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "missed"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(tf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outs['proofd542.cnf']\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ours = pkl.load(open('/home/XXXX/LLM-project/all_outs_temp1_dynFalse.pkl', 'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(list(ours.keys())[1])\n",
    "ours[list(ours.keys())[1]]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "list(ours.keys())[5]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cot"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels[list(ours.keys())[5]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ours[list(ours.keys())[5]]\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "LLM",
   "language": "python",
   "name": "llm"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
