{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "bdc2d871",
   "metadata": {},
   "source": [
    "### Evaluate cls tox"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "742556a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "k = 4\n",
    "rtp_score, count = {}, 0\n",
    "\n",
    "for model in ['gemma-2-2b', 'Meta-Llama-3-8B', 'Qwen2.5-7B']:\n",
    "    rtp_score[model] = {}\n",
    "    for intervention_params in ['mean_ot', 'linear_ot', 'cluster_ot', 'pca_ot', 'identity']:\n",
    "        rtp_score[model][intervention_params] = []\n",
    "        for seed in [10, 20, 30]:\n",
    "            try:\n",
    "                if intervention_params in [\"cluster_ot\",\"pca_ot\"]:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_toxicity/rtp_summary.json'\n",
    "                else:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_toxicity/rtp_summary.json'\n",
    "                with open(filename, 'r') as file:\n",
    "                    data = json.load(file)\n",
    "                    rtp_score[model][intervention_params].append(data['rtp_score'])\n",
    "            except: \n",
    "                print(filename, 'not loaded')\n",
    "rtp_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a08e6b3e",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "for model in rtp_score.keys():\n",
    "    for method in rtp_score[model].keys():\n",
    "        print(model, method, np.mean(rtp_score[model][method]), np.std(rtp_score[model][method]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "351edf32",
   "metadata": {},
   "source": [
    "### Evaluate 0shot tox"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7309bf22",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "k = 4\n",
    "zeroshot_score, count = {}, 0\n",
    "\n",
    "for model in ['gemma-2-2b', 'Meta-Llama-3-8B', 'Qwen2.5-7B']:\n",
    "    zeroshot_score[model] = {}\n",
    "    for intervention_params in ['mean_ot', 'linear_ot', 'cluster_ot', 'pca_ot', 'identity']:\n",
    "        zeroshot_score[model][intervention_params] = []\n",
    "        for seed in [10, 20, 30]:\n",
    "            try:\n",
    "                if intervention_params in [\"cluster_ot\",\"pca_ot\"]:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_0shot/0shot_eval.csv'\n",
    "                else:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_0shot/0shot_eval.csv'\n",
    "                data = pd.read_csv(filename)\n",
    "                zeroshot_score[model][intervention_params].append((data['q0_llm_answer'] == 'Yes').mean())\n",
    "                # alt: zeroshot_score[model][intervention_params].append((data['cls_pred'] == 'toxic').mean())\n",
    "            except: \n",
    "                print(filename, 'not loaded')\n",
    "zeroshot_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b5107123",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "for model in zeroshot_score.keys():\n",
    "    for method in zeroshot_score[model].keys():\n",
    "        print(model, method, np.mean(zeroshot_score[model][method]), np.std(zeroshot_score[model][method]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6a2b126a",
   "metadata": {},
   "source": [
    "### Evaluate PPL Wiki"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "88175272",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "k = 4\n",
    "wiki_score, count = {}, 0\n",
    "\n",
    "for model in ['gemma-2-2b', 'Meta-Llama-3-8B', 'Qwen2.5-7B']:\n",
    "    wiki_score[model] = {}\n",
    "    for intervention_params in ['mean_ot', 'linear_ot', 'cluster_ot', 'pca_ot', 'identity']:\n",
    "        wiki_score[model][intervention_params] = []\n",
    "        for seed in [10, 20, 30]:\n",
    "            try:\n",
    "                if intervention_params in [\"cluster_ot\",\"pca_ot\"]:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_toxicity/rtp_summary.json'\n",
    "                else:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_toxicity/rtp_summary.json'\n",
    "                with open(filename, 'r') as file:\n",
    "                    data = json.load(file)\n",
    "                    wiki_score[model][intervention_params].append(data['perplexity-wikipedia'])\n",
    "            except: \n",
    "                print(filename, 'not loaded')\n",
    "wiki_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4243c758",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "for model in wiki_score.keys():\n",
    "    for method in wiki_score[model].keys():\n",
    "        print(model, method, np.mean(wiki_score[model][method]), np.std(wiki_score[model][method]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5f8d1550",
   "metadata": {},
   "source": [
    "### Evaluate PPL Mistral"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6467fd3d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "k = 4\n",
    "mistral_score, count = {}, 0\n",
    "\n",
    "for model in ['gemma-2-2b', 'Meta-Llama-3-8B', 'Qwen2.5-7B']:\n",
    "    mistral_score[model] = {}\n",
    "    for intervention_params in ['mean_ot', 'linear_ot', 'cluster_ot', 'pca_ot', 'identity']:\n",
    "        mistral_score[model][intervention_params] = []\n",
    "        for seed in [10, 20, 30]:\n",
    "            try:\n",
    "                if intervention_params in [\"cluster_ot\",\"pca_ot\"]:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_perplexity/model_perplexity.csv'\n",
    "                else:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_perplexity/model_perplexity.csv'\n",
    "                data = pd.read_csv(filename)\n",
    "                mistral_score[model][intervention_params].append(data['ppl_Mistral-7B-v0.1'].mean())\n",
    "            except: \n",
    "                print(filename, 'not loaded')\n",
    "mistral_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "76d4b85b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "for model in mistral_score.keys():\n",
    "    for method in mistral_score[model].keys():\n",
    "        print(model, method, np.mean(mistral_score[model][method]), np.std(mistral_score[model][method]))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "93d1b3e9",
   "metadata": {},
   "source": [
    "### Evaluate PPL MMLU"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0e6eec7b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "k = 4\n",
    "mmlu_score, count = {}, 0\n",
    "\n",
    "for model in ['gemma-2-2b', 'Meta-Llama-3-8B', 'Qwen2.5-7B']:\n",
    "    mmlu_score[model] = {}\n",
    "    for intervention_params in ['mean_ot', 'linear_ot', 'cluster_ot', 'pca_ot', 'identity']:\n",
    "        mmlu_score[model][intervention_params] = []\n",
    "        for seed in [10, 20, 30]:\n",
    "            try:\n",
    "                if intervention_params in [\"cluster_ot\",\"pca_ot\"]:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_eleuther/eleuther.pkl'\n",
    "                else:\n",
    "                    filename = f'results/CHaRS/{intervention_params}/seed{seed}/evaluate_eleuther/eleuther.pkl'\n",
    "                with open(filename, 'rb') as file:\n",
    "                    data = pickle.load(file)\n",
    "                    mmlu_score[model][intervention_params].append(data['results']['mmlu']['acc,none'])\n",
    "            except: \n",
    "                print(filename, 'not loaded')\n",
    "mmlu_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d890dc63",
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "for model in mmlu_score.keys():\n",
    "    for method in mmlu_score[model].keys():\n",
    "        print(model, method, np.mean(mmlu_score[model][method]), np.std(mmlu_score[model][method]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9fab3c5b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nonparam_steering_env311 (3.11.14)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
