{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "29d722ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "96149b45",
   "metadata": {},
   "source": [
    "# climate+0.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 229,
   "id": "ae9541d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=350/2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 230,
   "id": "4ee192c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "89\n",
      "84.0 0.48\n",
      "86 0.49142857142857144\n",
      "matched num= 350\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.9/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-16]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\",\")\n",
    "                    c2=ref1.strip().split(\",\")\n",
    "                    if len(c1)>1 and len(c2)>1 and c1[-1]==c2[-1]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "            \n",
    "print(len(f1))\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 231,
   "id": "c56ba323",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "81.0 0.46285714285714286\n",
      "98 0.56\n",
      "matched num= 350\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.9/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-16]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\",\")\n",
    "                    c2=ref1.strip().split(\",\")\n",
    "                    if len(c1)>1 and len(c2)>1 and c1[-1]==c2[-1]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 232,
   "id": "dce83d96",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100.0 0.5714285714285714\n",
      "103 0.5885714285714285\n",
      "matched num= 350\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.9/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-16]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\",\")\n",
    "                    c2=ref1.strip().split(\",\")\n",
    "                    if len(c1)>1 and len(c2)>1 and c1[-1]==c2[-1]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 233,
   "id": "7589d3cb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "63.0 0.36\n",
      "103 0.5885714285714285\n",
      "matched num= 350\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.9/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-16]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\",\")\n",
    "                    c2=ref1.strip().split(\",\")\n",
    "                    if len(c1)>1 and len(c2)>1 and c1[-1]==c2[-1]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 234,
   "id": "5af09162",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "97.0 0.5542857142857143\n",
      "89 0.5085714285714286\n",
      "matched num= 350\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.9/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-16]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\",\")\n",
    "                    c2=ref1.strip().split(\",\")\n",
    "                    if len(c1)>1 and len(c2)>1 and c1[-1]==c2[-1]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 235,
   "id": "334dd0e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "71.0 0.4057142857142857\n",
      "98 0.56\n",
      "matched num= 350\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.9/detections_6.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-16]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\",\")\n",
    "                    c2=ref1.strip().split(\",\")\n",
    "                    if len(c1)>1 and len(c2)>1 and c1[-1]==c2[-1]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 563,
   "id": "419659e0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "350\n",
      "2.6927710843373496\n",
      "2.7989130434782608\n",
      "1.0394173718509871\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/climate+0.9/dataset_0.9.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    knum=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-16]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        knum+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(knum)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))\n",
    "    print(np.mean(tnum)/np.mean(fnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 239,
   "id": "559f403e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 5 5 5 5\n",
      "0.48571428571428577\n",
      "0.4797687861271676\n",
      "1 5 5 5 5\n",
      "0.5114285714285715\n",
      "0.4864864864864865\n",
      "2 5 5 5 5\n",
      "0.5800000000000001\n",
      "0.5739130434782609\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6]\n",
    "flist=[f1,f2,f3,f4,f5,f6]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(6-0):\n",
    "    #for jj in range(kk+1,6-0):\n",
    "        #for ii in range(jj+1,6-0):\n",
    "            #for i in range(ii+1,6):\n",
    "                #for j in range(i+1,6):\n",
    "                     #for k in range(j+1,13):\n",
    "                        #for m in range(k+1,13):\n",
    "                                a=tlist[kk]#+tlist[jj]#+tlist[ii]#+tlist[i]+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]#+tlist[z]+tlist[y]+tlist[p]# \n",
    "                                res=set(a)\n",
    "                                a=list(res)\n",
    "                                aa=len(a)\n",
    "                                a=flist[kk]#+flist[jj]#+flist[ii]#+flist[i]+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]#+flist[z]+flist[y]+flist[p]# \n",
    "                                res=set(a)\n",
    "                                a=list(res)\n",
    "                                bb=len(a)\n",
    "                                TN=(num-bb)/(num*2)\n",
    "                                TP=aa/(num*2)\n",
    "                                FN=(num-aa)/(num*2)\n",
    "                                FP=bb/(num*2)\n",
    "                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                if TN+TP>=minV:\n",
    "                                    print(kk,jj,ii,i,j)#,k,m,n,q,w,z,y,p)\n",
    "                                    print(TN+TP)\n",
    "                                    print(F1)\n",
    "                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "01103093",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "144\n",
      "0.8228571428571428\n"
     ]
    }
   ],
   "source": [
    "a=t2+t3\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "208fd140",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "106\n",
      "0.3942857142857143\n"
     ]
    }
   ],
   "source": [
    "a=f2+f3\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 118,
   "id": "568f0a12",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.08857142857142856\n",
      "FP 0.3028571428571429\n",
      "TN 0.19714285714285715\n",
      "TP 0.4114285714285714\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "id": "6c1aa40d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.6085714285714285\n",
      "0.576\n",
      "0.8228571428571428\n",
      "0.6776470588235295\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2741a141",
   "metadata": {},
   "source": [
    "# climate+0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 240,
   "id": "238f84b2",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=328/2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 241,
   "id": "dae212ca",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "74.0 0.45121951219512196\n",
      "55 0.3353658536585366\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 242,
   "id": "95414188",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "80.0 0.4878048780487805\n",
      "58 0.35365853658536583\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 243,
   "id": "7281943f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "64.0 0.3902439024390244\n",
      "69 0.42073170731707316\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 244,
   "id": "0657e128",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "71.0 0.4329268292682927\n",
      "65 0.39634146341463417\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 245,
   "id": "f75eccd1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "69.0 0.42073170731707316\n",
      "65 0.39634146341463417\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 246,
   "id": "6f7b1cf4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "75.0 0.4573170731707317\n",
      "47 0.2865853658536585\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 247,
   "id": "d052f556",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "48.0 0.2926829268292683\n",
      "80 0.4878048780487805\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 248,
   "id": "9271ad53",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "43.0 0.2621951219512195\n",
      "81 0.49390243902439024\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 249,
   "id": "98bb7086",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "81.0 0.49390243902439024\n",
      "58 0.35365853658536583\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 250,
   "id": "c5f399c5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "86.0 0.524390243902439\n",
      "58 0.35365853658536583\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 251,
   "id": "bc30d7af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "76.0 0.4634146341463415\n",
      "68 0.4146341463414634\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_6.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 252,
   "id": "044c467a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "79.0 0.4817073170731707\n",
      "68 0.4146341463414634\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 253,
   "id": "233be7af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "87.0 0.5304878048780488\n",
      "55 0.3353658536585366\n",
      "matched num= 328\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.5/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 268,
   "id": "c612d59f",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2 3\n",
      "0.5457317073170732\n",
      "0.6409638554216868\n",
      "0 1 2 4\n",
      "0.5579268292682926\n",
      "0.657210401891253\n",
      "0 1 2 5\n",
      "0.5640243902439024\n",
      "0.657074340527578\n",
      "0 1 2 12\n",
      "0.5640243902439024\n",
      "0.6603325415676959\n",
      "0 1 3 9\n",
      "0.5701219512195121\n",
      "0.6602409638554217\n",
      "0 1 11 12\n",
      "0.573170731707317\n",
      "0.6618357487922706\n",
      "1 6 7 12\n",
      "0.573170731707317\n",
      "0.6666666666666666\n",
      "1 7 9 12\n",
      "0.573170731707317\n",
      "0.6650717703349281\n",
      "1 7 11 12\n",
      "0.573170731707317\n",
      "0.6666666666666666\n",
      "2 4 6 10\n",
      "0.573170731707317\n",
      "0.6666666666666666\n",
      "2 6 7 12\n",
      "0.5762195121951219\n",
      "0.6729411764705883\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-3):\n",
    "    for jj in range(kk+1,13-2):\n",
    "        for ii in range(jj+1,13-1):\n",
    "            for i in range(ii+1,13-0):\n",
    "                #for j in range(i+1,13-0):\n",
    "                     #for k in range(j+1,13-2):\n",
    "                        #for m in range(k+1,13-1):\n",
    "                            #for n in range(m+1,13-0):\n",
    "                                #for q in range(n+1,13-0):\n",
    "                                    #for w in range(q+1,13-0):\n",
    "                                        #for z in range(w+1,13-0):\n",
    "                                            #for y in range(z+1,13-0):\n",
    "                                                a=tlist[kk]+tlist[jj]+tlist[ii]+tlist[i]#+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]+flist[jj]+flist[ii]+flist[i]#+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj,ii,i)#,j,k,m,n,q,w,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 255,
   "id": "13fd38f6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "149\n",
      "0.9085365853658537\n"
     ]
    }
   ],
   "source": [
    "a=t1+t2+t3+t4+t5+t6+t7+t11+t22+t33+t44+t55+t66\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 256,
   "id": "cb903be8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "129\n",
      "0.21341463414634146\n"
     ]
    }
   ],
   "source": [
    "a=f1+f2+f3+f4+f5+f6+f7+f11+f22+f33+f44+f55+f66\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 257,
   "id": "623c022a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.04573170731707317\n",
      "FP 0.3932926829268293\n",
      "TN 0.10670731707317073\n",
      "TP 0.45426829268292684\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 258,
   "id": "ff417411",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5609756097560976\n",
      "0.5359712230215827\n",
      "0.9085365853658537\n",
      "0.6742081447963801\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1920ab19",
   "metadata": {},
   "source": [
    "# climate+0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 161,
   "id": "c5be4e50",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=174"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "59aae0c8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "82 0.47126436781609193\n",
      "56 0.3218390804597701\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "e9b393a2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "91 0.5229885057471264\n",
      "56 0.3218390804597701\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "eff1ab53",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "73 0.41954022988505746\n",
      "67 0.3850574712643678\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "134b8dac",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "77 0.4425287356321839\n",
      "59 0.3390804597701149\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "1cf2f3c0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "75 0.43103448275862066\n",
      "59 0.3390804597701149\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 167,
   "id": "89f85b5f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "79 0.4540229885057471\n",
      "57 0.3275862068965517\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 168,
   "id": "15715aa7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "44 0.25287356321839083\n",
      "76 0.4367816091954023\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "id": "d008a394",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "41 0.23563218390804597\n",
      "84 0.4827586206896552\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "id": "117db8bf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "104 0.5977011494252874\n",
      "44 0.25287356321839083\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 171,
   "id": "6fe0cdca",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "118 0.6781609195402298\n",
      "84 0.4827586206896552\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 172,
   "id": "ec92d774",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "58 0.3333333333333333\n",
      "79 0.4540229885057471\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_6.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 173,
   "id": "84701fb1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "66 0.3793103448275862\n",
      "76 0.4367816091954023\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 174,
   "id": "6d92101f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "98 0.5632183908045977\n",
      "52 0.2988505747126437\n",
      "matched num= 348\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/climate+0.1/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/climate+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 272,
   "id": "681c5d21",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "348\n",
      "5.505747126436781\n",
      "5.5344827586206895\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/climate+0.1/dataset_0.1.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    num=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-8]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        num+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(num)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 186,
   "id": "b2ea545a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2\n",
      "0.5086206896551724\n",
      "0.6122448979591837\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-11):\n",
    "    for jj in range(kk+1,13-10):\n",
    "        for ii in range(jj+1,13-9):\n",
    "            for i in range(ii+1,13-8):\n",
    "                for j in range(i+1,13-7):\n",
    "                     for k in range(j+1,13-6):\n",
    "                        for m in range(k+1,13-5):\n",
    "                            for n in range(m+1,13-4):\n",
    "                                for q in range(n+1,13-3):\n",
    "                                    for w in range(q+1,13-3):\n",
    "                                        for z in range(w+1,13-1):\n",
    "                                            for y in range(z+1,13):\n",
    "                                                a=tlist[kk]+tlist[jj]+tlist[ii]+tlist[i]+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]#+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]+flist[jj]+flist[ii]+flist[i]+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]#+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj,ii)#,i,j,k,m,n,q,w,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 187,
   "id": "f5fd2a29",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "135\n",
      "0.7758620689655172\n"
     ]
    }
   ],
   "source": [
    "a=t1+t2+t3+t4+t5+t6+t7+t11+t22+t33+t44+t55+t66\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 188,
   "id": "a1609f3b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "133\n",
      "0.23563218390804597\n"
     ]
    }
   ],
   "source": [
    "a=f1+f2+f3+f4+f5+f6+f7+f11+f22+f33+f44+f55+f66\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 189,
   "id": "398bd98b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.11206896551724138\n",
      "FP 0.382183908045977\n",
      "TN 0.11781609195402298\n",
      "TP 0.3879310344827586\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 190,
   "id": "59d17b74",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5057471264367817\n",
      "0.503731343283582\n",
      "0.7758620689655172\n",
      "0.6108597285067873\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cb2e7d37",
   "metadata": {},
   "source": [
    "# pubhealth+0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 209,
   "id": "badc4472",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=216"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 210,
   "id": "6fd46a75",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "214 0.9907407407407407\n",
      "4 0.018518518518518517\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 211,
   "id": "904315f3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "212 0.9814814814814815\n",
      "4 0.018518518518518517\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 212,
   "id": "962d1c52",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "211 0.9768518518518519\n",
      "4 0.018518518518518517\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 213,
   "id": "38717c9c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "202 0.9351851851851852\n",
      "18 0.08333333333333333\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 214,
   "id": "1152029f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "212 0.9814814814814815\n",
      "1 0.004629629629629629\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 215,
   "id": "0d6ff6e1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "205 0.9490740740740741\n",
      "14 0.06481481481481481\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 216,
   "id": "65f35bdc",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "209 0.9675925925925926\n",
      "5 0.023148148148148147\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 217,
   "id": "92ed9bef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "215 0.9953703703703703\n",
      "5 0.023148148148148147\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 218,
   "id": "a47b886a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "214 0.9907407407407407\n",
      "6 0.027777777777777776\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 219,
   "id": "baa5fff0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "214 0.9907407407407407\n",
      "10 0.046296296296296294\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 220,
   "id": "8f3b939b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "207 0.9583333333333334\n",
      "15 0.06944444444444445\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_6.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 221,
   "id": "bbc0ad50",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "209 0.9675925925925926\n",
      "13 0.06018518518518518\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "id": "92201417",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "212 0.9814814814814815\n",
      "4 0.018518518518518517\n",
      "matched num= 432\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.1/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 228,
   "id": "51761682",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2 12\n",
      "0.5231481481481481\n",
      "0.6578073089700996\n",
      "0 1 3 12\n",
      "0.5231481481481481\n",
      "0.6578073089700996\n",
      "0 1 4 12\n",
      "0.5254629629629629\n",
      "0.6600331674958541\n",
      "0 1 9 12\n",
      "0.5254629629629629\n",
      "0.6600331674958541\n",
      "0 1 11 12\n",
      "0.5254629629629629\n",
      "0.6600331674958541\n",
      "0 2 3 12\n",
      "0.5254629629629629\n",
      "0.6589018302828619\n",
      "0 2 4 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 2 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 2 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 3 4 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 3 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 3 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 5 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 6 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 7 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 8 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 10 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 4 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 5 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 5 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 6 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 6 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 7 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 7 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 8 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 8 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 9 10 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 9 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 9 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 10 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "0 11 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "1 3 8 12\n",
      "0.5277777777777778\n",
      "0.6599999999999999\n",
      "1 3 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "1 3 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "1 5 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "1 8 9 12\n",
      "0.5277777777777778\n",
      "0.6599999999999999\n",
      "1 8 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "1 11 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 3 4 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 5 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 6 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 7 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 8 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 10 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 4 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 5 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 5 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 7 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 7 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 8 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 8 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 9 10 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 9 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 10 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "2 11 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 5 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 6 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 7 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 8 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 10 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 11 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 4 12 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 5 8 12\n",
      "0.5277777777777778\n",
      "0.6599999999999999\n",
      "3 5 9 12\n",
      "0.5277777777777778\n",
      "0.6611295681063123\n",
      "3 5 11 12\n",
      "0.5300925925925926\n",
      "0.6622296173044925\n",
      "3 8 9 12\n",
      "0.5300925925925926\n",
      "0.6622296173044925\n",
      "3 8 11 12\n",
      "0.5324074074074074\n",
      "0.6633333333333333\n",
      "3 11 12 12\n",
      "0.5324074074074074\n",
      "0.6633333333333333\n",
      "5 8 12 12\n",
      "0.5324074074074074\n",
      "0.6610738255033558\n",
      "5 11 12 12\n",
      "0.5324074074074074\n",
      "0.6633333333333333\n",
      "8 11 12 12\n",
      "0.5324074074074074\n",
      "0.6633333333333333\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-2):\n",
    "    for jj in range(kk+1,13-1):\n",
    "        for ii in range(jj+1,13-0):\n",
    "            #for i in range(ii+1,13-1):\n",
    "                #for j in range(i+1,13):\n",
    "                     #for k in range(j+1,13):\n",
    "                        #for m in range(k+1,13):\n",
    "                            #for n in range(m+1,13):\n",
    "                                #for q in range(n+1,13):\n",
    "                                    #for w in range(q+1,13):\n",
    "                                        #for z in range(w+1,13):\n",
    "                                            #for y in range(z+1,13):\n",
    "                                                a=tlist[kk]+tlist[jj]+tlist[ii]#+tlist[i]+tlist[j]#+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]+flist[jj]+flist[ii]#+flist[i]+flist[j]#+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj,ii,i)#,j,k,m,n,q,w,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "id": "33e916ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "432\n",
      "10.86111111111111\n",
      "11.717592592592593\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/pubhealth+0.1/dataset_0.1.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    num=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-8]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        num+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(num)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "1f6ec717",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "198\n",
      "0.9166666666666666\n"
     ]
    }
   ],
   "source": [
    "a=t7+t22\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "bdc9b7e6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "186\n",
      "0.1388888888888889\n"
     ]
    }
   ],
   "source": [
    "a=f5+f6\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "58b833ad",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.041666666666666664\n",
      "FP 0.4305555555555556\n",
      "TN 0.06944444444444445\n",
      "TP 0.4583333333333333\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "e5172d40",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5277777777777778\n",
      "0.515625\n",
      "0.9166666666666666\n",
      "0.6599999999999999\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3ef7b0d",
   "metadata": {},
   "source": [
    "# pubhealth+0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 191,
   "id": "88286abc",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=221"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 192,
   "id": "35a402e1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "214 0.9683257918552036\n",
      "4 0.01809954751131222\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 193,
   "id": "b54170ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "214 0.9683257918552036\n",
      "5 0.02262443438914027\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 194,
   "id": "98f1b4c0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "211 0.9547511312217195\n",
      "4 0.01809954751131222\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                        break\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 195,
   "id": "c079022b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "204 0.9230769230769231\n",
      "12 0.05429864253393665\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "id": "72105dd5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "215 0.9728506787330317\n",
      "3 0.013574660633484163\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 197,
   "id": "93babb4b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "212 0.9592760180995475\n",
      "7 0.03167420814479638\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 198,
   "id": "bde2deda",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "204 0.9230769230769231\n",
      "7 0.03167420814479638\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                        break\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 199,
   "id": "a8a7ad95",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "214 0.9683257918552036\n",
      "5 0.02262443438914027\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 200,
   "id": "483a0355",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "218 0.9864253393665159\n",
      "6 0.027149321266968326\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 201,
   "id": "e7e9692c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "217 0.9819004524886877\n",
      "11 0.049773755656108594\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 202,
   "id": "61953005",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "207 0.9366515837104072\n",
      "14 0.06334841628959276\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_6.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 203,
   "id": "74afd526",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "214 0.9683257918552036\n",
      "13 0.058823529411764705\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")\n",
    "                    if len(c1)>1 and len(c2)>1 and c1[-1]==c2[-1]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(n-pre)\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 204,
   "id": "a51d5011",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "213 0.9638009049773756\n",
      "2 0.00904977375565611\n",
      "matched num= 442\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.5/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 208,
   "id": "ac60460f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2\n",
      "0.5339366515837104\n",
      "0.6666666666666666\n",
      "0 1 2\n",
      "0.5339366515837104\n",
      "0.6666666666666666\n",
      "0 1 2\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 2\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 2\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 2\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 2\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 8\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 8\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 9\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 9\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 1 10\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 3\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 8\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 8\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 9\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 9\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 2 10\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 4\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 5\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 6\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 7\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 8\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 9\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 9\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 3 10\n",
      "0.5361990950226244\n",
      "0.6688206785137318\n",
      "0 4 5\n",
      "0.5384615384615384\n",
      "0.6699029126213593\n",
      "0 4 5\n",
      "0.5384615384615384\n",
      "0.6699029126213593\n",
      "0 4 6\n",
      "0.5384615384615384\n",
      "0.6699029126213593\n",
      "1 2 3\n",
      "0.5384615384615384\n",
      "0.6699029126213593\n",
      "1 2 3\n",
      "0.5384615384615384\n",
      "0.6699029126213593\n",
      "1 2 4\n",
      "0.5384615384615384\n",
      "0.6699029126213593\n",
      "1 2 4\n",
      "0.5407239819004525\n",
      "0.6709886547811994\n",
      "1 2 4\n",
      "0.5407239819004525\n",
      "0.6709886547811994\n",
      "1 2 7\n",
      "0.5407239819004525\n",
      "0.6709886547811994\n",
      "1 4 7\n",
      "0.5407239819004525\n",
      "0.6709886547811994\n",
      "2 4 7\n",
      "0.5407239819004525\n",
      "0.6709886547811994\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-3):\n",
    "    for jj in range(kk+1,13-2):\n",
    "        for ii in range(jj+1,13-1):\n",
    "            for i in range(ii+1,13-0):\n",
    "                #for j in range(i+1,13-7):\n",
    "                     #for k in range(j+1,13-6):\n",
    "                        #for m in range(k+1,13-5):\n",
    "                            #for n in range(m+1,13-4):\n",
    "                                #for q in range(n+1,13-3):\n",
    "                                    #for w in range(q+1,13-2):\n",
    "                                        #for z in range(w+1,13-1):\n",
    "                                            #for y in range(z+1,13):\n",
    "                                                a=tlist[kk]+tlist[jj]+tlist[ii]+tlist[i]#+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]+flist[jj]+flist[ii]+flist[i]#+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj,ii)#,i,j,k,m,n,q,w,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "id": "97304cd5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "442\n",
      "10.846153846153847\n",
      "11.805429864253394\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/pubhealth+0.5/dataset_0.5.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    num=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-8]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        num+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(num)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "5e7f2791",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "206\n",
      "0.9321266968325792\n"
     ]
    }
   ],
   "source": [
    "a=t5+t6\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "a906e362",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "187\n",
      "0.15384615384615385\n"
     ]
    }
   ],
   "source": [
    "a=f5+f6\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "7be8390d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.033936651583710405\n",
      "FP 0.4230769230769231\n",
      "TN 0.07692307692307693\n",
      "TP 0.4660633484162896\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "id": "4fcaf4fe",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5429864253393666\n",
      "0.5241730279898219\n",
      "0.9321266968325792\n",
      "0.6710097719869706\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c15a384e",
   "metadata": {},
   "source": [
    "# pubhealth+0.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "9795f50d",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=227"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "c3bda13c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "105 0.46255506607929514\n",
      "93 0.40969162995594716\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "bc88d94e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "107 0.4713656387665198\n",
      "85 0.3744493392070485\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "c238970d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 0.44052863436123346\n",
      "99 0.43612334801762115\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "1c240be8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "111 0.4889867841409692\n",
      "94 0.41409691629955947\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "a82e2a06",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "105 0.46255506607929514\n",
      "99 0.43612334801762115\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "9f4e246e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "106 0.4669603524229075\n",
      "100 0.44052863436123346\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "02e4dd61",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "97 0.42731277533039647\n",
      "119 0.5242290748898678\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "946494af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "96 0.42290748898678415\n",
      "110 0.4845814977973568\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "54427f95",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "125 0.5506607929515418\n",
      "92 0.4052863436123348\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "f61fda4c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "131 0.5770925110132159\n",
      "94 0.41409691629955947\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "9fb2cb85",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "97 0.42731277533039647\n",
      "110 0.4845814977973568\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_6.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "a18d18c7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100 0.44052863436123346\n",
      "90 0.3964757709251101\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "da70b584",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "109 0.4801762114537445\n",
      "92 0.4052863436123348\n",
      "matched num= 454\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/pubhealth+0.9/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/pubhealth+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "id": "293f5f80",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2 3 4 5 6 7 8 9\n",
      "0.5176211453744494\n",
      "0.6604651162790697\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-11):\n",
    "    for jj in range(kk+1,13-10):\n",
    "        for ii in range(jj+1,13-9):\n",
    "            for i in range(ii+1,13-8):\n",
    "                for j in range(i+1,13-7):\n",
    "                     for k in range(j+1,13-6):\n",
    "                        for m in range(k+1,13-5):\n",
    "                            for n in range(m+1,13-4):\n",
    "                                for q in range(n+1,13-3):\n",
    "                                    for w in range(q+1,13-2):\n",
    "                                        for z in range(w+1,13-1):\n",
    "                                            for y in range(z+1,13):\n",
    "                                                a=tlist[kk]+tlist[jj]+tlist[ii]+tlist[i]+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]#+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]+flist[jj]+flist[ii]+flist[i]+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]#+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj,ii,i,j,k,m,n,q,w)#,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 324,
   "id": "49cc0adf",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "454\n",
      "6.418502202643172\n",
      "6.418502202643172\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/pubhealth+0.9/dataset_0.9.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    knum=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-8]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        knum+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(knum)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "id": "1fbf4f02",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "213\n",
      "0.9383259911894273\n"
     ]
    }
   ],
   "source": [
    "a=t1+t2+t3+t4+t5+t6+t7+t11+t22+t33+t44+t55+t66\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "id": "d26b5d9f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "206\n",
      "0.09251101321585903\n"
     ]
    }
   ],
   "source": [
    "a=f1+f2+f3+f4+f5+f6+f7+f11+f22+f33+f44+f55+f66\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "id": "bf3167ff",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.030837004405286344\n",
      "FP 0.45374449339207046\n",
      "TN 0.046255506607929514\n",
      "TP 0.46916299559471364\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "id": "c325319a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5154185022026432\n",
      "0.5083532219570406\n",
      "0.9383259911894273\n",
      "0.6594427244582043\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ffcb03f8",
   "metadata": {},
   "source": [
    "# wice+0.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "id": "1a01bbfa",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=248"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "id": "13025b4c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "191 0.7701612903225806\n",
      "65 0.2620967741935484\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/combine/detections_1_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[-2]\n",
    "                    c2=ref1.strip().split(\".\")[-2]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(n-pre)\n",
    "            print(ref)\n",
    "            \n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "id": "d45f3ab2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "246 0.9919354838709677\n",
      "5 0.020161290322580645\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[-2]\n",
    "                    c2=ref1.strip().split(\".\")[-2]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 125,
   "id": "f249341a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "238 0.9596774193548387\n",
      "16 0.06451612903225806\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "id": "457d76a1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "225 0.907258064516129\n",
      "17 0.06854838709677419\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "id": "0d7cae8e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "246 0.9919354838709677\n",
      "2 0.008064516129032258\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "id": "9f3301a5",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "240 0.967741935483871\n",
      "12 0.04838709677419355\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "id": "b7d7334c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "246 0.9919354838709677\n",
      "3 0.012096774193548387\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "id": "c284f2ab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "241 0.9717741935483871\n",
      "4 0.016129032258064516\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "id": "88a845aa",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "244 0.9838709677419355\n",
      "4 0.016129032258064516\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "id": "bf3f9e40",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "236 0.9516129032258065\n",
      "10 0.04032258064516129\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "id": "c9a2925d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "188 0.7580645161290323\n",
      "75 0.3024193548387097\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/combine/detections_6_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 340,
   "id": "58f49295",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n",
      "xxx\n"
     ]
    }
   ],
   "source": [
    "with open('detections_6_2.jsonl','w') as twofile:\n",
    "    with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "        for line1 in file1:\n",
    "            label=line1.split(\"label: \")[-1]\n",
    "            ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "            if len(ref1)==3:\n",
    "                ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "            else:\n",
    "                ref1=ref1[-1]\n",
    "            ref1=ref1[:-8]\n",
    "            if ref1 not in f666:\n",
    "                with open(\"llama_data/wice+0.1/combine/detections_6.jsonl\") as file:\n",
    "                    flag=False\n",
    "                    for line in file:\n",
    "                        answer=line.split(\"> Assistant: \")[1]\n",
    "                        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "                        pre=n\n",
    "                        if ref.strip()==ref1.strip():\n",
    "                            flag=True\n",
    "                            twofile.write(line)\n",
    "                        if flag==False:\n",
    "                            # \n",
    "                            c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                            c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                            if c1==c2:\n",
    "                                flag=True\n",
    "                                twofile.write(line)\n",
    "                        if flag==False:\n",
    "                            uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                            length=len(uni)\n",
    "                            if ref1.strip()[-length:] == uni:\n",
    "                                flag = True\n",
    "                                twofile.write(line)\n",
    "                    if flag==False:\n",
    "                        print('one error')\n",
    "            else:\n",
    "                print('xxx')\n",
    "                with open('llama_data/wice+0.1/detections_6.jsonl') as file2:\n",
    "                    flag=False\n",
    "                    for line2 in file2:\n",
    "                        answer=line2.split(\"> Assistant: \")[1]\n",
    "                        ref=line2.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "                        pre=n\n",
    "                        if ref.strip()==ref1.strip():\n",
    "                            flag=True\n",
    "                            twofile.write(line2)\n",
    "                        if flag==False:\n",
    "                            # \n",
    "                            c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                            c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                            if c1==c2:\n",
    "                                flag=True\n",
    "                                twofile.write(line2)\n",
    "                        if flag==False:\n",
    "                            uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                            length=len(uni)\n",
    "                            if ref1.strip()[-length:] == uni:\n",
    "                                flag = True\n",
    "                                twofile.write(line2)\n",
    "                    if flag==False:\n",
    "                        print('one error')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "id": "864cad15",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "240 0.967741935483871\n",
      "6 0.024193548387096774\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "id": "b5586d73",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "239 0.9637096774193549\n",
      "11 0.04435483870967742\n",
      "matched num= 496\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.1/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.1/dataset_0.1.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")\n",
    "                    c2=ref1.strip().split(\".\")\n",
    "                    if len(c1)>2 and len(c2)>2 and c1[-2]==c2[-2]:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "id": "704d872a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 12 12 12 5 6 7\n",
      "0.5141129032258065\n",
      "0.5880341880341879\n",
      "5 12 12 12 5 6 7\n",
      "0.5443548387096775\n",
      "0.6089965397923874\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-0):\n",
    "    #for jj in range(kk+1,13-0):\n",
    "        #for ii in range(jj+1,13-0):\n",
    "            #for i in range(ii+1,13-0):\n",
    "                #for j in range(i+1,13-2):\n",
    "                     #for k in range(j+1,13-1):\n",
    "                        #for m in range(k+1,13):\n",
    "                            #for n in range(m+1,13-4):\n",
    "                                #for q in range(n+1,13-3):\n",
    "                                    #for w in range(q+1,13-2):\n",
    "                                        #for z in range(w+1,13-1):\n",
    "                                            #for y in range(z+1,13):\n",
    "                                                a=tlist[kk]#+tlist[jj]#+tlist[ii]+tlist[i]+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]#+flist[jj]#+flist[ii]+flist[i]+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj,ii,i,j,k,m)#,n,q,w,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 347,
   "id": "5c5f648b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "496\n",
      "11.084677419354838\n",
      "11.116935483870968\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/wice+0.1/dataset_0.1.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    num=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-8]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        num+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(num)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 162,
   "id": "3d41a3eb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "172\n",
      "0.7577092511013216\n"
     ]
    }
   ],
   "source": [
    "a=t4+t55\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 163,
   "id": "436a16d3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "154\n",
      "0.32158590308370044\n"
     ]
    }
   ],
   "source": [
    "a=f4+f55\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 164,
   "id": "edef845e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.1211453744493392\n",
      "FP 0.3392070484581498\n",
      "TN 0.16079295154185022\n",
      "TP 0.3788546255506608\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:将负有hallucination的比例\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:将负类正确预测为负类类错误预测为正类数,即误把factual数据判定为数,即检测出的factual数据比例\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 165,
   "id": "fc25325c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.539647577092511\n",
      "0.5276073619631901\n",
      "0.7577092511013216\n",
      "0.6220614828209765\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4bedb0fc",
   "metadata": {},
   "source": [
    "# wice+0.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "id": "8ddbacfe",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=243"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 104,
   "id": "e433c585",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "170 0.6995884773662552\n",
      "64 0.26337448559670784\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 105,
   "id": "6f43f560",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "165 0.6790123456790124\n",
      "67 0.2757201646090535\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "id": "ffe0a491",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "165 0.6790123456790124\n",
      "75 0.30864197530864196\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "id": "ea3781f7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "166 0.6831275720164609\n",
      "76 0.31275720164609055\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 108,
   "id": "7c590629",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "162 0.6666666666666666\n",
      "69 0.2839506172839506\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "id": "226ba7ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "181 0.7448559670781894\n",
      "63 0.25925925925925924\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "id": "52573393",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "136 0.5596707818930041\n",
      "90 0.37037037037037035\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 111,
   "id": "240b72c3",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "140 0.5761316872427984\n",
      "103 0.42386831275720166\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "id": "d0ddfb5d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "177 0.7283950617283951\n",
      "62 0.2551440329218107\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 113,
   "id": "8af3a292",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "170 0.6995884773662552\n",
      "68 0.27983539094650206\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 114,
   "id": "2bb2742c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "229 0.9423868312757202\n",
      "83 0.34156378600823045\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_6_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "id": "da021996",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "147 0.6049382716049383\n",
      "70 0.2880658436213992\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "id": "1ae8ae81",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "170 0.6995884773662552\n",
      "65 0.2674897119341564\n",
      "matched num= 486\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.5/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.5/dataset_0.5.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 120,
   "id": "d4a9247f",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1 2 12\n",
      "0.5\n",
      "0.6345864661654136\n",
      "0 1 5 12\n",
      "0.5267489711934157\n",
      "0.6577380952380952\n",
      "0 2 5 12\n",
      "0.5267489711934157\n",
      "0.655688622754491\n",
      "0 5 6 12\n",
      "0.5267489711934157\n",
      "0.6577380952380952\n",
      "0 5 7 12\n",
      "0.5308641975308642\n",
      "0.6576576576576576\n",
      "0 5 10 12\n",
      "0.5411522633744856\n",
      "0.6646616541353384\n",
      "3 4 5 12\n",
      "0.5411522633744856\n",
      "0.6636500754147813\n",
      "3 5 10 12\n",
      "0.5473251028806585\n",
      "0.6656534954407295\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-2):\n",
    "    for jj in range(kk+1,13-1):\n",
    "        for ii in range(jj+1,13-0):\n",
    "            #for i in range(ii+1,13):\n",
    "                #for j in range(i+1,13-3):\n",
    "                     #for k in range(j+1,13-2):\n",
    "                        #for m in range(k+1,13-1):\n",
    "                            #for n in range(m+1,13):\n",
    "                                #for q in range(n+1,13-3):\n",
    "                                    #for w in range(q+1,13-2):\n",
    "                                        #for z in range(w+1,13-1):\n",
    "                                            #for y in range(z+1,13):\n",
    "                                                a=tlist[kk]+tlist[jj]+tlist[ii]#+tlist[i]+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]+flist[jj]+flist[ii]#+flist[i]+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj,ii,i)#,j,k,m,n,q,w,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 544,
   "id": "2f2b2a47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "486\n",
      "8.069958847736626\n",
      "8.378600823045268\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/wice+0.5/dataset_0.5.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    knum=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-8]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        knum+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(knum)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 545,
   "id": "4d27e54b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "208\n",
      "0.8559670781893004\n"
     ]
    }
   ],
   "source": [
    "a=t6\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 546,
   "id": "ddb40453",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "140\n",
      "0.42386831275720166\n"
     ]
    }
   ],
   "source": [
    "a=f6\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 547,
   "id": "de3cad4f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.0720164609053498\n",
      "FP 0.2880658436213992\n",
      "TN 0.21193415637860083\n",
      "TP 0.4279835390946502\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 548,
   "id": "d6f499ae",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.6399176954732511\n",
      "0.5977011494252873\n",
      "0.8559670781893004\n",
      "0.7038917089678511\n"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "71bf4c7c",
   "metadata": {},
   "source": [
    "# wice+0.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "id": "b847b333",
   "metadata": {},
   "outputs": [],
   "source": [
    "num=261"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 142,
   "id": "21911b6c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "252 0.9655172413793104\n",
      "20 0.07662835249042145\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t1=[]\n",
    "f1=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t1.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t1.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f1.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "id": "3bb0299a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "250 0.9578544061302682\n",
      "9 0.034482758620689655\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t11=[]\n",
    "f11=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_1_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t11.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t11.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f11.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "id": "558dc89e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "239 0.9157088122605364\n",
      "28 0.10727969348659004\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t2=[]\n",
    "f2=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_2.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t2.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t2.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f2.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 145,
   "id": "2f51bff8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "235 0.9003831417624522\n",
      "36 0.13793103448275862\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t22=[]\n",
    "f22=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_2_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t22.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t22.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f22.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 146,
   "id": "7e04add1",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "254 0.9731800766283525\n",
      "7 0.02681992337164751\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t3=[]\n",
    "f3=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_3.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t3.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t3.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f3.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 147,
   "id": "16ef9ae9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "246 0.9425287356321839\n",
      "22 0.0842911877394636\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t33=[]\n",
    "f33=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_3_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t33.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t33.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f33.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 148,
   "id": "c76d8ae8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "255 0.9770114942528736\n",
      "14 0.05363984674329502\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t4=[]\n",
    "f4=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_4.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t4.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t4.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f4.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 149,
   "id": "eff70de0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "249 0.9540229885057471\n",
      "28 0.10727969348659004\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t44=[]\n",
    "f44=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_4_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t44.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t44.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f44.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 150,
   "id": "7c10b74a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "250 0.9578544061302682\n",
      "24 0.09195402298850575\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t5=[]\n",
    "f5=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_5.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t5.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t5.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f5.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 151,
   "id": "79232798",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "250 0.9578544061302682\n",
      "20 0.07662835249042145\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t55=[]\n",
    "f55=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_5_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t55.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t55.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f55.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 158,
   "id": "e94b9c68",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "232 0.8888888888888888\n",
      "103 0.3946360153256705\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t6=[]\n",
    "f6=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_6_2_1.jsonl\") as file:#6_2_1\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t6.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t6.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f6.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 153,
   "id": "22b4b023",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "142 0.5440613026819924\n",
      "87 0.3333333333333333\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t66=[]\n",
    "f66=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_6_1.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t66.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t66.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f66.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 154,
   "id": "9ba91a9b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "250 0.9578544061302682\n",
      "18 0.06896551724137931\n",
      "matched num= 522\n"
     ]
    }
   ],
   "source": [
    "t=0\n",
    "f=0\n",
    "t7=[]\n",
    "f7=[]\n",
    "n=0\n",
    "with open(\"llama_data/wice+0.9/detections_7.jsonl\") as file:\n",
    "    for line in file:\n",
    "        answer=line.split(\"\\\"\\\"> Assistant: \")[1]\n",
    "        ref=line.split(\"P2: \")[0].split(\"P1: \")[1][0:-3]\n",
    "        pre=n\n",
    "        flag=False\n",
    "        with open(\"llama_data/wice+0.9/dataset_0.9.jsonl\") as file1:\n",
    "            for line1 in file1:\n",
    "                label=line1.split(\"label: \")[-1]\n",
    "                ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "                if len(ref1)==3:\n",
    "                    ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "                else:\n",
    "                    ref1=ref1[-1]\n",
    "                ref1=ref1[:-8]\n",
    "                if ref.strip()==ref1.strip():\n",
    "                    flag=True\n",
    "                    n+=1\n",
    "                    if 'true' in label: # hallucination\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            t+=1\n",
    "                        else:    \n",
    "                            t7.append(ref1)\n",
    "                    else: # factual\n",
    "                        if \"here are no conflicting parts\" in answer: \n",
    "                            f+=1\n",
    "                        else:    \n",
    "                            f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\".\")[0]\n",
    "                    c2=ref1.strip().split(\".\")[0]\n",
    "                    if c1.strip()==c2.strip():\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    # \n",
    "                    c1=ref.strip().split(\"\\\\\\\\\")[0]\n",
    "                    c2=ref1.strip().split(\"\\\\\\\\\")[0]\n",
    "                    if c1==c2:\n",
    "                        flag=True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "                if flag==False:\n",
    "                    uni=ref.strip().split('\\\\u')[-1][4:]\n",
    "                    length=len(uni)\n",
    "                    if ref1.strip()[-length:] == uni:\n",
    "                        flag = True\n",
    "                        n+=1\n",
    "                        if 'true' in label: # hallucination\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                t+=1\n",
    "                            else:    \n",
    "                                t7.append(ref1)\n",
    "                        else: # factual\n",
    "                            if \"here are no conflicting parts\" in answer: \n",
    "                                f+=1\n",
    "                            else:    \n",
    "                                f7.append(ref1)\n",
    "        if n-pre!=1:\n",
    "            print(ref1)\n",
    "            print()\n",
    "print(num-t,(num-t)/num)\n",
    "print(f,f/num)\n",
    "print('matched num=',n)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 160,
   "id": "fca2e152",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 1\n",
      "0.5019157088122606\n",
      "0.6368715083798882\n",
      "0 5\n",
      "0.5172413793103449\n",
      "0.646067415730337\n",
      "1 5\n",
      "0.5172413793103449\n",
      "0.6440677966101694\n",
      "4 5\n",
      "0.5172413793103449\n",
      "0.6470588235294118\n",
      "5 8\n",
      "0.5383141762452107\n",
      "0.6562054208273894\n",
      "5 12\n",
      "0.5689655172413793\n",
      "0.6696035242290749\n"
     ]
    }
   ],
   "source": [
    "tlist=[t1,t2,t3,t4,t5,t6,t7,t11,t22,t33,t44,t55,t66]\n",
    "flist=[f1,f2,f3,f4,f5,f6,f7,f11,f22,f33,f44,f55,f66]\n",
    "#      0   1  2  3  4  5  6  7   8   9   10  11  12\n",
    "minV=0\n",
    "for kk in range(13-1):\n",
    "    for jj in range(kk+1,13-0):\n",
    "        #for ii in range(jj+1,13-1):\n",
    "            #for i in range(ii+1,13-0):\n",
    "                #for j in range(i+1,13):\n",
    "                     #for k in range(j+1,13):\n",
    "                        #for m in range(k+1,13):\n",
    "                            #for n in range(m+1,13-4):\n",
    "                                #for q in range(n+1,13-3):\n",
    "                                    #for w in range(q+1,13-2):\n",
    "                                        #for z in range(w+1,13-1):\n",
    "                                            #for y in range(z+1,13):\n",
    "                                                a=tlist[kk]+tlist[jj]#+tlist[ii]+tlist[i]#+tlist[j]+tlist[k]+tlist[m]+tlist[n]+tlist[q]+tlist[w]+tlist[z]+tlist[y]+tlist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                aa=len(a)\n",
    "                                                a=flist[kk]+flist[jj]#+flist[ii]+flist[i]#+flist[j]+flist[k]+flist[m]+flist[n]+flist[q]+flist[w]+flist[z]+flist[y]+flist[p] \n",
    "                                                a=list(set(a))\n",
    "                                                bb=len(a)\n",
    "                                                TN=(num-bb)/(num*2)\n",
    "                                                TP=aa/(num*2)\n",
    "                                                FN=(num-aa)/(num*2)\n",
    "                                                FP=bb/(num*2)\n",
    "                                                F1=2*TP/(2*TP+FP+FN)\n",
    "                                                if TN+TP>=minV:\n",
    "                                                    print(kk,jj)#,ii,i,j,k,m,n,q,w,z,y,p)\n",
    "                                                    print(TN+TP)\n",
    "                                                    print(F1)\n",
    "                                                    minV=TN+TP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 495,
   "id": "9d871ade",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "522\n",
      "10.149425287356323\n",
      "10.348659003831418\n"
     ]
    }
   ],
   "source": [
    "with open('llama_data/wice+0.9/dataset_0.9.jsonl') as file1: \n",
    "    fnum=[]\n",
    "    tnum=[]\n",
    "    knum=0\n",
    "    for line1 in file1:\n",
    "        ref1=line1.split(\"> Assistant: \")[0].split(\"Reference: \")\n",
    "        if len(ref1)==3:\n",
    "            ref1=ref1[1]+\"Reference: \"+ref1[2]\n",
    "        else:\n",
    "            ref1=ref1[-1]\n",
    "        ref1=ref1[:-8]\n",
    "        label=line1.split(\"label: \")[-1]\n",
    "        knum+=1\n",
    "        if 'true' in label: # factual\n",
    "            num2=0\n",
    "            for t in tlist:\n",
    "                if ref1 in t:\n",
    "                    num2+=1\n",
    "            tnum.append(num2)\n",
    "            #print(num,num2) \n",
    "        else:\n",
    "            num1=0\n",
    "            for f in flist:\n",
    "                if ref1 in f:\n",
    "                    num1+=1\n",
    "            fnum.append(num1)\n",
    "            #print(num,num1)\n",
    "    print(knum)\n",
    "    print(np.mean(fnum))\n",
    "    print(np.mean(tnum))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "id": "4a30c9df",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "0.0\n"
     ]
    }
   ],
   "source": [
    "a=t6\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "aa=len(a)\n",
    "print(len(a)/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "id": "0efb33ce",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "a=f6\n",
    "res=set(a)\n",
    "a=list(res)\n",
    "print(len(a))\n",
    "bb=len(a)\n",
    "print((num-len(a))/num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "id": "e4edb2e7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "FN 0.5\n",
      "FP 0.0\n",
      "TN 0.5\n",
      "TP 0.0\n"
     ]
    }
   ],
   "source": [
    "all_num=num*2\n",
    "# FN:\n",
    "FN=(num-aa)/all_num\n",
    "print('FN',FN)\n",
    "# FP:\n",
    "FP=bb/all_num\n",
    "print('FP',FP)\n",
    "# TN:\n",
    "TN=(num-bb)/all_num\n",
    "print('TN',TN)\n",
    "# TP:\n",
    "TP=aa/all_num\n",
    "print('TP',TP)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "id": "1c4b2d3d",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.5\n"
     ]
    },
    {
     "ename": "ZeroDivisionError",
     "evalue": "float division by zero",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mZeroDivisionError\u001b[0m                         Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[94], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m Accuracy\u001b[38;5;241m=\u001b[39mTN\u001b[38;5;241m+\u001b[39mTP\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28mprint\u001b[39m(Accuracy)\n\u001b[1;32m----> 3\u001b[0m Precision\u001b[38;5;241m=\u001b[39m\u001b[43mTP\u001b[49m\u001b[38;5;241;43m/\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mTP\u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43mFP\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      4\u001b[0m \u001b[38;5;28mprint\u001b[39m(Precision)\n\u001b[0;32m      5\u001b[0m Recall\u001b[38;5;241m=\u001b[39mTP\u001b[38;5;241m/\u001b[39m(TP\u001b[38;5;241m+\u001b[39mFN)\n",
      "\u001b[1;31mZeroDivisionError\u001b[0m: float division by zero"
     ]
    }
   ],
   "source": [
    "Accuracy=TN+TP\n",
    "print(Accuracy)\n",
    "Precision=TP/(TP+FP)\n",
    "print(Precision)\n",
    "Recall=TP/(TP+FN)\n",
    "print(Recall)\n",
    "F1=2*TP/(2*TP+FP+FN)\n",
    "print(F1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d66a9eef",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
