{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "74e7fb8d-7415-41d3-980a-84e46016f2bd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import os\n",
    "import numpy as np\n",
    "import scipy\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "cd138468-a11b-42c9-a23c-de41c762bac7",
   "metadata": {},
   "outputs": [],
   "source": [
    "model_list = [\n",
    "                \"facebook/opt-1.3b\",\n",
    "                \"meta-llama/Meta-Llama-3-8B\",\n",
    "                \"mistralai/Mistral-7B-v0.1\",\n",
    "                ]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b427c1ae-2d36-4b11-802a-e27ae2a7ef95",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "facebook/opt-1.3b -12.357440952195484\n",
      "meta-llama/Meta-Llama-3-8B -5.257429582691421\n",
      "mistralai/Mistral-7B-v0.1 -5.4396624359856505\n"
     ]
    }
   ],
   "source": [
    "for model_name1 in model_list:\n",
    "    base_path = './'\n",
    "    data_name = 'triviaqa'\n",
    "\n",
    "    save_result_path = \"{}/result/{}/{}\".format(base_path, data_name, model_name1)\n",
    "    os.makedirs(save_result_path, exist_ok=True) \n",
    "    save_file_name = f\"{save_result_path}/log_likelihood_list.pt\"\n",
    "    with open(save_file_name, 'rb') as f:\n",
    "        log_likelihood_list1 = pickle.load(f)\n",
    "    log_likelihood_list1 = np.array(log_likelihood_list1)\n",
    "    print(model_name1, np.mean(log_likelihood_list1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5d6a5a08-8a82-4b4b-9cfe-4016eff2fade",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6397f0ba-75e0-4ebb-a351-cf85db279a45",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_path = './'\n",
    "\n",
    "data_name = 'triviaqa'\n",
    "model_name1 = \"meta-llama/Meta-Llama-3-8B\"\n",
    "\n",
    "save_result_path = \"{}/result/{}/{}\".format(base_path, data_name, model_name1)\n",
    "os.makedirs(save_result_path, exist_ok=True) \n",
    "save_file_name = f\"{save_result_path}/log_likelihood_list.pt\"\n",
    "with open(save_file_name, 'rb') as f:\n",
    "    log_likelihood_list1 = pickle.load(f)\n",
    "log_likelihood_list1 = np.array(log_likelihood_list1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "111f2754-1591-406b-9e80-68b1b3d88b8f",
   "metadata": {},
   "outputs": [],
   "source": [
    "base_path = './'\n",
    "\n",
    "data_name = 'triviaqa'\n",
    "model_name2 = \"mistralai/Mistral-7B-v0.1\"\n",
    "\n",
    "\n",
    "save_result_path = \"{}/result/{}/{}\".format(base_path, data_name, model_name2)\n",
    "os.makedirs(save_result_path, exist_ok=True) \n",
    "save_file_name = f\"{save_result_path}/log_likelihood_list.pt\"\n",
    "with open(save_file_name, 'rb') as f:\n",
    "    log_likelihood_list2 = pickle.load(f)\n",
    "log_likelihood_list2 = np.array(log_likelihood_list2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "13ee9006-18e0-466a-b81a-f030246cae39",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "alpha = 0.1\n",
    "z_alpha = scipy.stats.norm.ppf(1 - alpha, loc=0, scale=1)\n",
    "z_half_alpha =  scipy.stats.norm.ppf(1 - alpha / 2, loc=0, scale=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f5735df6-4f0b-48d4-979e-d0416d302a97",
   "metadata": {},
   "outputs": [],
   "source": [
    "temp_mean = (-log_likelihood_list1 + log_likelihood_list2).mean()\n",
    "temp_std = (-log_likelihood_list1 + log_likelihood_list2).std()\n",
    "temp_lower = temp_mean - z_half_alpha * temp_std / np.sqrt(len(log_likelihood_list1))\n",
    "temp_upper = temp_mean + z_half_alpha * temp_std / np.sqrt(len(log_likelihood_list1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "a495d196-1477-45cf-9385-b013f96b23c2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-0.18 (-0.22, -0.14)\n"
     ]
    }
   ],
   "source": [
    "print(f\"{temp_mean:.2f} ({temp_lower:.2f}, {temp_upper:.2f})\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fbeff652-b3f0-4654-8fb9-8a17e2d87830",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a6c01b3e-a66f-495e-8cb5-87f41bfa63c7",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "myenv",
   "language": "python",
   "name": "myenv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
