{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%pip install tabulate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from tabulate import tabulate\n",
    "from tqdm import tqdm\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "ipi_da_df = pd.read_csv('results/pattern_search_results.txt', sep='|')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ipi_da_df.groupby(['Orchestrator', 'Input Type', 'Error Type']).agg({'Shell': 'mean'}).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "col = 'Refusal'\n",
    "filtered_df = ipi_da_df[ipi_da_df['Input Type'] == 'direct-ask']\n",
    "print(filtered_df.groupby(['Model', 'Input Type'])[col].mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "metagpt_df = pd.read_csv('results/autogen/all_trials.csv', sep='|')\n",
    "orchestrators = autogen_df['Orchestrator'].unique()\n",
    "\n",
    "# Prepare data for the refusal rates table\n",
    "refusal_table_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]\n",
    "    ipi_df = orchestrator_df[orchestrator_df['Input Type'].isin(['ipi-web-text', 'ipi-local-text'])]\n",
    "    refusal_rate_ipi1 = f\"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi1')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_ipi2 = f\"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi2')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_ipi3 = f\"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi3')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_ipi_avg = f\"{int(ipi_df['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_da = f\"{int(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_table_data.append([orchestrator, refusal_rate_ipi1, refusal_rate_ipi2, refusal_rate_ipi3, refusal_rate_ipi_avg, refusal_rate_da])\n",
    "# Print the refusal rates table\n",
    "print(tabulate(refusal_table_data, headers=[\"Orchestrator\", \"Ignore Previous\", \"InjecAgent\", \"AgentDojo DOS\", \"Avg. IPI\", \"DA\"], tablefmt=\"latex\"))\n",
    "\n",
    "\n",
    "# Prepare data for the first table\n",
    "table1_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    print(orchestrator)\n",
    "    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]\n",
    "    total_da_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')])\n",
    "    total_ipi_trials =  len(orchestrator_df[(orchestrator_df['Input Type'] == 'ipi-web-text')])\n",
    "    total_normal_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')])\n",
    "    \n",
    "    success_count_local = orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].sum()\n",
    "    success_count_ipi = orchestrator_df[(orchestrator_df['Error Type'] == 'ipi-web-text')]['Success'].sum()\n",
    "    success_count_da = orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].sum()\n",
    "    avg_asr = success_count_local / total_normal_trials\n",
    "    ipi_asr = success_count_ipi / total_ipi_trials\n",
    "    da_asr = success_count_da / total_da_trials\n",
    "    table1_data.append([orchestrator, avg_asr, ipi_asr, da_asr])\n",
    "\n",
    "# Print the first table\n",
    "print(tabulate(table1_data, headers=[\"Orchestrator\", \"Avg. ASR\", \"IPI ASR\", \"DA ASR\"], tablefmt=\"latex\"))\n",
    "\n",
    "# Prepare data for the second table\n",
    "table2_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]\n",
    "    success_rate_4o = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    success_rate_4o_mini = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o-mini') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    success_rate_gem_pro = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-pro') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    success_rate_gem_flash = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-flash') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    avg_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)\n",
    "    ipi_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-ipi')]['Success'].mean(), 3)\n",
    "    da_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].mean(), 3)\n",
    "    table2_data.append([orchestrator, success_rate_4o, success_rate_4o_mini, success_rate_gem_pro, success_rate_gem_flash, avg_asr, ipi_asr, da_asr])\n",
    "\n",
    "# Print the second table\n",
    "print(tabulate(table2_data, headers=[\"Orch.\", \"4o\", \"4o-mini\", \"Gem-1.5-Pro\", \"Gem-1.5-Flash\", \"Avg. ASR\", \"IPI\", \"DA\"], tablefmt=\"latex\"))\n",
    "\n",
    "# Prepare data for the third table\n",
    "table3_data = {}\n",
    "for orchestrator in orchestrators:\n",
    "    table3_data[orchestrator] = {}\n",
    "    for input_type in autogen_df['Input Type'].unique():\n",
    "        table3_data[orchestrator][input_type] = {}\n",
    "        for model in autogen_df['Model'].unique():\n",
    "            total_trials = len(autogen_df[(autogen_df['Orchestrator'] == orchestrator) & (autogen_df['Input Type'] == input_type) & (autogen_df['Model'] == model)])\n",
    "            orchestrator_df = autogen_df[(autogen_df['Orchestrator'] == orchestrator) & (autogen_df['Input Type'] == input_type) & (autogen_df['Model'] == model)]\n",
    "            success_count = orchestrator_df['Success'].sum()\n",
    "            table3_data[orchestrator][input_type][model] = success_count / total_trials\n",
    "\n",
    "print(\"\"\"\\\\begin{table}[h]\n",
    "    \\\\centering\n",
    "    \\\\begin{tabular}{llcccccccccccc}\n",
    "        \\\\toprule\n",
    "        & & \\\\multicolumn{4}{c}{\\\\textbf{Local File}} & \\\\multicolumn{4}{c}{\\\\textbf{Web Text Redirect}} \\\\\n",
    "        \\\\textbf{} & \\\\textbf{Orch.} & 4o & mini & pro & flash & 4o & mini & pro & flash & 4o & mini & pro & flash \\\\\\\\ \n",
    "        \\\\midrule\n",
    "        \\\\multirow{3}{*}{AG} \n",
    "        & MO & \"\"\" + \\\n",
    "        f\"\"\"{table3_data['magentic-one']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['magentic-one']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['web-text-redirect']['gemini-1.5-flash']:.3f} & \n",
    "        {table3_data['magentic-one']['local-text']['gpt-4o']:.3f} & {table3_data['magentic-one']['local-text']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['local-text']['gemini-1.5-flash']:.3f} \\\\\\\\\n",
    "        & Sel. & \n",
    "        {table3_data['selector']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['selector']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['selector']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['selector']['web-text-redirect']['gemini-1.5-flash']:.3f} & \n",
    "        {table3_data['selector']['local-text']['gpt-4o']:.3f} & {table3_data['selector']['local-text']['gpt-4o-mini']:.3f} & {table3_data['selector']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['selector']['local-text']['gemini-1.5-flash']:.3f} \\\\\\\\\n",
    "        & RR & \n",
    "        {table3_data['round-robin']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['round-robin']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['web-text-redirect']['gemini-1.5-flash']:.3f} & \n",
    "        {table3_data['round-robin']['local-text']['gpt-4o']:.3f} & {table3_data['round-robin']['local-text']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['local-text']['gemini-1.5-flash']:.3f} \\\\\\\\\"\"\" +\n",
    "    \"\"\"\\n\\\\end{tabular}\n",
    "\\\\end{table}\"\"\")\n",
    "\n",
    "\n",
    "print(\"\"\"\\\\begin{table}[h]\n",
    "    \\\\centering\n",
    "    \\\\begin{tabular}{llcccccccccccc}\n",
    "        \\\\toprule\n",
    "        & & \\\\multicolumn{4}{c}{\\\\textbf{Web Text Single File}} & \\\\multicolumn{4}{c}{\\\\textbf{Web Image}}\\\\\n",
    "        \\\\textbf{} & \\\\textbf{Orch.} & 4o & mini & pro & flash & 4o & mini & pro & flash & 4o & mini & pro & flash \\\\\\\\ \n",
    "        \\\\midrule\n",
    "        \\\\multirow{3}{*}{AG} \n",
    "        & MO & \"\"\" + \\\n",
    "        f\"\"\"{table3_data['magentic-one']['web-text-single-file']['gpt-4o']:.3f} & {table3_data['magentic-one']['web-text-single-file']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['web-text-single-file']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['web-text-single-file']['gemini-1.5-flash']:.3f} &\n",
    "          {table3_data['magentic-one']['web-image']['gpt-4o']:.3f} & {table3_data['magentic-one']['web-image']['gpt-4o-mini']:.3f} & {table3_data['magentic-one']['web-image']['gemini-1.5-pro']:.3f} & {table3_data['magentic-one']['web-image']['gemini-1.5-flash']:.3f} \\\\\\\\ \n",
    "        & Sel. & {table3_data['selector']['web-text-single-file']['gpt-4o']:.3f} & {table3_data['selector']['web-text-single-file']['gpt-4o-mini']:.3f} & {table3_data['selector']['web-text-single-file']['gemini-1.5-pro']:.3f} & {table3_data['selector']['web-text-single-file']['gemini-1.5-flash']:.3f} &\n",
    "         {table3_data['selector']['web-image']['gpt-4o']:.3f} & {table3_data['selector']['web-image']['gpt-4o-mini']:.3f} & {table3_data['selector']['web-image']['gemini-1.5-pro']:.3f} & {table3_data['selector']['web-image']['gemini-1.5-flash']:.3f} \\\\\\\\ \n",
    "        & RR & {table3_data['round-robin']['web-text-single-file']['gpt-4o']:.3f} & {table3_data['round-robin']['web-text-single-file']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['web-text-single-file']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['web-text-single-file']['gemini-1.5-flash']:.3f} &\n",
    "         {table3_data['round-robin']['web-image']['gpt-4o']:.3f} & {table3_data['round-robin']['web-image']['gpt-4o-mini']:.3f} & {table3_data['round-robin']['web-image']['gemini-1.5-pro']:.3f} & {table3_data['round-robin']['web-image']['gemini-1.5-flash']:.3f} \\\\\\\\\"\"\" +\n",
    "    \"\"\"\\n\\\\end{tabular}\n",
    "\\\\end{table}\"\"\")\n",
    "\n",
    "# Prepare data for the fourth table\n",
    "table4_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    orchestrator_df = autogen_df[autogen_df['Orchestrator'] == orchestrator]\n",
    "    avg_local = round(orchestrator_df[(orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    avg_web_redirect = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)\n",
    "    avg_web_single = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-single-file')]['Success'].mean(), 3)\n",
    "    avg_web_image = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-image')]['Success'].mean(), 3)\n",
    "    table4_data.append([orchestrator, avg_local, avg_web_redirect, avg_web_single, avg_web_image])\n",
    "\n",
    "# Print the fourth table\n",
    "print(tabulate(table4_data, headers=[\"Orchestrator\", \"Local\", \"Web Redirect\", \"Web (Single)\", \"Web Image\"], tablefmt=\"latex\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "metagpt_df = pd.read_csv('results/metagpt/all_trials.csv', sep='|')\n",
    "metagpt_df[(metagpt_df['Success']==1) & (metagpt_df['Unsafe Code ID']==1) & (metagpt_df['Refusal']==1)]\n",
    "\n",
    "orchestrators = metagpt_df['Orchestrator'].unique()\n",
    "\n",
    "# Prepare data for the refusal rates table\n",
    "refusal_table_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]\n",
    "    ipi_df = orchestrator_df[orchestrator_df['Input Type'].isin(['ipi-web-text', 'ipi-local-text'])]\n",
    "    refusal_rate_ipi1 = f\"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi1')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_ipi2 = f\"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi2')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_ipi3 = f\"{int(ipi_df[ipi_df['Error Type'].str.startswith('ipi3')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_ipi_avg = f\"{int(ipi_df['Refusal'].mean() * 100)}%\"\n",
    "    refusal_rate_da = f\"{int(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Refusal'].mean() * 100)}%\"\n",
    "    refusal_table_data.append([orchestrator, refusal_rate_ipi1, refusal_rate_ipi2, refusal_rate_ipi3, refusal_rate_ipi_avg, refusal_rate_da])\n",
    "# Print the refusal rates table\n",
    "print(tabulate(refusal_table_data, headers=[\"Orchestrator\", \"Ignore Previous\", \"InjecAgent\", \"AgentDojo DOS\", \"Avg. IPI\", \"DA\"], tablefmt=\"latex\"))\n",
    "\n",
    "\n",
    "# Prepare data for the first table\n",
    "table1_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    print(orchestrator)\n",
    "    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]\n",
    "    total_da_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')])\n",
    "    total_ipi_trials =  len(orchestrator_df[(orchestrator_df['Input Type'] == 'ipi-web-text')])\n",
    "    total_normal_trials = len(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')])\n",
    "    \n",
    "    success_count_local = orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].sum()\n",
    "    success_count_ipi = orchestrator_df[(orchestrator_df['Error Type'] == 'ipi-web-text')]['Success'].sum()\n",
    "    success_count_da = orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].sum()\n",
    "    avg_asr = success_count_local / total_normal_trials\n",
    "    ipi_asr = success_count_ipi / total_ipi_trials\n",
    "    da_asr = success_count_da / total_da_trials\n",
    "    table1_data.append([orchestrator, avg_asr, ipi_asr, da_asr])\n",
    "\n",
    "# Print the first table\n",
    "print(tabulate(table1_data, headers=[\"Orchestrator\", \"Avg. ASR\", \"IPI ASR\", \"DA ASR\"], tablefmt=\"latex\"))\n",
    "\n",
    "# Prepare data for the second table\n",
    "table2_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]\n",
    "    success_rate_4o = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    success_rate_4o_mini = round(orchestrator_df[(orchestrator_df['Model'] == 'gpt-4o-mini') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    success_rate_gem_pro = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-pro') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    success_rate_gem_flash = round(orchestrator_df[(orchestrator_df['Model'] == 'gemini-1.5-flash') & (orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    avg_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)\n",
    "    ipi_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-ipi')]['Success'].mean(), 3)\n",
    "    da_asr = round(orchestrator_df[(orchestrator_df['Input Type'] == 'direct-ask')]['Success'].mean(), 3)\n",
    "    table2_data.append([orchestrator, success_rate_4o, success_rate_4o_mini, success_rate_gem_pro, success_rate_gem_flash, avg_asr, ipi_asr, da_asr])\n",
    "\n",
    "# Print the second table\n",
    "print(tabulate(table2_data, headers=[\"Orch.\", \"4o\", \"4o-mini\", \"Gem-1.5-Pro\", \"Gem-1.5-Flash\", \"Avg. ASR\", \"IPI\", \"DA\"], tablefmt=\"latex\"))\n",
    "\n",
    "# Prepare data for the third table\n",
    "table3_data = {}\n",
    "for orchestrator in orchestrators:\n",
    "    table3_data[orchestrator] = {}\n",
    "    for input_type in metagpt_df['Input Type'].unique():\n",
    "        table3_data[orchestrator][input_type] = {}\n",
    "        for model in metagpt_df['Model'].unique():\n",
    "            total_trials = len(metagpt_df[(metagpt_df['Orchestrator'] == orchestrator) & (metagpt_df['Input Type'] == input_type) & (metagpt_df['Model'] == model)])\n",
    "            orchestrator_df = metagpt_df[(metagpt_df['Orchestrator'] == orchestrator) & (metagpt_df['Input Type'] == input_type) & (metagpt_df['Model'] == model)]\n",
    "            success_count = orchestrator_df['Success'].sum()\n",
    "            table3_data[orchestrator][input_type][model] = success_count / total_trials\n",
    "\n",
    "print(\"\"\"\\\\begin{table}[h]\n",
    "    \\\\centering\n",
    "    \\\\begin{tabular}{llcccccccccccc}\n",
    "        \\\\toprule\n",
    "        & & \\\\multicolumn{4}{c}{\\\\textbf{Local File}} & \\\\multicolumn{4}{c}{\\\\textbf{Web Text Redirect}} \\\\\n",
    "        \\\\textbf{} & \\\\textbf{Orch.} & 4o & mini & pro & flash & 4o & mini & pro & flash & 4o & mini & pro & flash \\\\\\\\ \n",
    "        \\\\midrule\n",
    "        \\\\multirow{3}{*}{AG} \n",
    "        & Def. & \"\"\" + \\\n",
    "        f\"\"\"{table3_data['metagpt']['web-text-redirect']['gpt-4o']:.3f} & {table3_data['metagpt']['web-text-redirect']['gpt-4o-mini']:.3f} & {table3_data['metagpt']['web-text-redirect']['gemini-1.5-pro']:.3f} & {table3_data['metagpt']['web-text-redirect']['gemini-1.5-flash']:.3f} & \n",
    "        {table3_data['metagpt']['local-text']['gpt-4o']:.3f} & {table3_data['metagpt']['local-text']['gpt-4o-mini']:.3f} & {table3_data['metagpt']['local-text']['gemini-1.5-pro']:.3f} & {table3_data['metagpt']['local-text']['gemini-1.5-flash']:.3f}\"\"\" + \\\n",
    "    \"\"\"\\n\\\\end{tabular}\n",
    "\\\\end{table}\"\"\")\n",
    "\n",
    "# Prepare data for the fourth table\n",
    "table4_data = []\n",
    "for orchestrator in orchestrators:\n",
    "    orchestrator_df = metagpt_df[metagpt_df['Orchestrator'] == orchestrator]\n",
    "    avg_local = round(orchestrator_df[(orchestrator_df['Input Type'] == 'local-text')]['Success'].mean(), 3)\n",
    "    avg_web_redirect = round(orchestrator_df[(orchestrator_df['Input Type'] == 'web-text-redirect')]['Success'].mean(), 3)\n",
    "    table4_data.append([orchestrator, avg_local, avg_web_redirect])\n",
    "\n",
    "# Print the fourth table\n",
    "print(tabulate(table4_data, headers=[\"Orchestrator\", \"Local\", \"Web Redirect\"], tablefmt=\"latex\"))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analysis of Success in Local and Web-Redirect by Error Type\n",
    "\n",
    "# Group the data by 'Input Type' and 'Error Type' and calculate the mean success rate\n",
    "grouped_data = df.groupby(['Input Type', 'Error Type'])['Success'].mean().reset_index()\n",
    "\n",
    "# Separate the data for local-text and web-text-redirect\n",
    "local_text_data = grouped_data[grouped_data['Input Type'] == 'local-text']\n",
    "web_text_redirect_data = grouped_data[grouped_data['Input Type'] == 'web-text-redirect']\n",
    "\n",
    "# Print the analysis\n",
    "print(\"Analysis of Success in Local Text by Error Type:\")\n",
    "print(tabulate(local_text_data, headers=[\"Input Type\", \"Error Type\", \"Mean Success Rate\"], tablefmt=\"grid\"))\n",
    "\n",
    "print(\"\\nAnalysis of Success in Web Text Redirect by Error Type:\")\n",
    "print(tabulate(web_text_redirect_data, headers=[\"Input Type\", \"Error Type\", \"Mean Success Rate\"], tablefmt=\"grid\"))\n",
    "\n",
    "# Plot the analysis\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# Plot for local-text\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.bar(local_text_data['Error Type'], local_text_data['Success'], color='blue')\n",
    "plt.xlabel('Error Type')\n",
    "plt.ylabel('Mean Success Rate')\n",
    "plt.title('Mean Success Rate for Local Text by Error Type')\n",
    "plt.show()\n",
    "\n",
    "# Plot for web-text-redirect\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.bar(web_text_redirect_data['Error Type'], web_text_redirect_data['Success'], color='green')\n",
    "plt.xlabel('Error Type')\n",
    "plt.ylabel('Mean Success Rate')\n",
    "plt.title('Mean Success Rate for Web Text Redirect by Error Type')\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Analysis of Success in Local and Web-Redirect by Error Type excluding gemini-1.5-flash\n",
    "\n",
    "# Filter out entries related to gemini-1.5-flash\n",
    "filtered_df = df[df['Model'] != 'gemini-1.5-flash']\n",
    "\n",
    "# Group the filtered data by 'Input Type' and 'Error Type' and calculate the mean success rate\n",
    "grouped_data = filtered_df.groupby(['Input Type', 'Error Type'])['Success'].mean().reset_index()\n",
    "\n",
    "# Separate the data for local-text and web-text-redirect\n",
    "local_text_data = grouped_data[grouped_data['Input Type'] == 'local-text']\n",
    "web_text_redirect_data = grouped_data[grouped_data['Input Type'] == 'web-text-redirect']\n",
    "\n",
    "# Print the analysis\n",
    "print(\"Analysis of Success in Local Text by Error Type (excluding gemini-1.5-flash):\")\n",
    "print(tabulate(local_text_data, headers=[\"Input Type\", \"Error Type\", \"Mean Success Rate\"], tablefmt=\"grid\"))\n",
    "\n",
    "print(\"\\nAnalysis of Success in Web Text Redirect by Error Type (excluding gemini-1.5-flash):\")\n",
    "print(tabulate(web_text_redirect_data, headers=[\"Input Type\", \"Error Type\", \"Mean Success Rate\"], tablefmt=\"grid\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "def find_completion():\n",
    "    target_path = \"web-text-redirect/selector/gemini-1.5-pro/wordpress/query_2_trial_10.txt\"\n",
    "    \n",
    "    try:\n",
    "        with open('results/autogen/batch_output.jsonl', 'r') as f:\n",
    "            for line in f:\n",
    "                result = json.loads(line)\n",
    "                # Look for metadata or file path in the request\n",
    "                request = result.get('request', {}).get('body', {}).get('messages', [])\n",
    "                for msg in request:\n",
    "                    if target_path in str(msg):\n",
    "                        # Found our target completion\n",
    "                        response = result.get('response', {}).get('body', {})\n",
    "                        if response:\n",
    "                            for choice in response.get('choices', []):\n",
    "                                message = choice.get('message', {})\n",
    "                                if message.get('role') == 'assistant':\n",
    "                                    content = message.get('content')\n",
    "                                    if content:\n",
    "                                        try:\n",
    "                                            analysis = json.loads(content)\n",
    "                                            print(\"Analysis for\", target_path)\n",
    "                                            print(\"-\" * 80)\n",
    "                                            for key, value in analysis.items():\n",
    "                                                print(f\"{key}: {value}\")\n",
    "                                            return\n",
    "                                        except json.JSONDecodeError:\n",
    "                                            print(\"Error parsing JSON content\")\n",
    "                                            print(\"Raw content:\", content)\n",
    "                                            return\n",
    "    except Exception as e:\n",
    "        print(f\"Error reading file: {e}\")\n",
    "\n",
    "find_completion()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "autogen_venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
