{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Improve Labeler Strategy\n",
    "This Jupyter notebook focuses on improving a labeler strategy by updating one of the bob's weights. It explores two approaches for adjusting weights: the logistic method and the influence method, evaluating their impact on cosine similarity and accuracy. The notebook compares these strategies to improve the performance of a model, using labeled datasets for validation. ​"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/anaconda3/envs/if_rlhf/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n",
      "/opt/anaconda3/envs/if_rlhf/lib/python3.10/site-packages/transformers/utils/generic.py:441: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
      "  _torch_pytree._register_pytree_node(\n",
      "/opt/anaconda3/envs/if_rlhf/lib/python3.10/site-packages/transformers/utils/generic.py:309: FutureWarning: `torch.utils._pytree._register_pytree_node` is deprecated. Please use `torch.utils._pytree.register_pytree_node` instead.\n",
      "  _torch_pytree._register_pytree_node(\n",
      "/var/folders/5z/p7v97k1552s5tjbjstsc6whm0000gn/T/ipykernel_62638/2219467105.py:18: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  rapid_grad_train = torch.load(f\"{model_path}/rapid_grad_train.pt\")[D]\n",
      "/var/folders/5z/p7v97k1552s5tjbjstsc6whm0000gn/T/ipykernel_62638/2219467105.py:19: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n",
      "  rapid_grad_val = torch.load(f\"{model_path}/rapid_grad_val.pt\")[D]\n"
     ]
    }
   ],
   "source": [
    "import datasets\n",
    "import numpy as np\n",
    "from sklearn import svm\n",
    "import torch\n",
    "import utils.influence as utils\n",
    "D = 65536 # 2^16, size of rapid grad\n",
    "\n",
    "# Set device\n",
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "_WORK_PATH = \"../..\"\n",
    "train_data_path = _WORK_PATH + \"/dataset/train/alice_bob\"\n",
    "val_data_path = _WORK_PATH + \"/dataset/val/alice_bob\"\n",
    "model_path = _WORK_PATH + \"/dataset/model/alice_bob\"\n",
    "val_dataset_alice = datasets.load_from_disk(val_data_path)\n",
    "train_dataset_bob = datasets.load_from_disk(train_data_path)\n",
    "\n",
    "rapid_grad_train = torch.load(f\"{model_path}/rapid_grad_train.pt\")[D]\n",
    "rapid_grad_val = torch.load(f\"{model_path}/rapid_grad_val.pt\")[D]\n",
    "flipped_indices = np.load(f\"{train_data_path}/flipped_indices.npy\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compute Cosine Similarity and Accuracy using Logistic Regression"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "\n",
    "\n",
    "def compute_cs_and_accuracy_using_logistic(weight_alice, weight_bob, eval_dataset_alice, train_dataset_bob):\n",
    "    X = []\n",
    "    y = []\n",
    "    for example in eval_dataset_alice:\n",
    "        score_0 = np.array([example['correctness_0'], example['coherence_0'], example['complexity_0'], example['verbosity_0']])\n",
    "        score_1 = np.array([example['correctness_1'], example['coherence_1'], example['complexity_1'], example['verbosity_1']])\n",
    "        \n",
    "        # Calculate Alice's scores for both responses\n",
    "        score_0_alice = np.dot(weight_alice, score_0)\n",
    "        score_1_alice = np.dot(weight_alice, score_1)\n",
    "        \n",
    "        # Determine the preferred response and construct the feature vectors\n",
    "        if score_0_alice > score_1_alice:\n",
    "            X.append(score_0)\n",
    "            y.append(1)  # score_0 is preferred\n",
    "            X.append(score_1)\n",
    "            y.append(0)  # score_1 is not preferred\n",
    "        else:\n",
    "            X.append(score_1)\n",
    "            y.append(1)  # score_1 is preferred\n",
    "            X.append(score_0)\n",
    "            y.append(0)  # score_0 is not preferred\n",
    "\n",
    "    X = np.array(X)\n",
    "    y = np.array(y)\n",
    "\n",
    "    # Standardize the features before applying logistic regression\n",
    "    scaler = StandardScaler()\n",
    "    X_scaled = scaler.fit_transform(X)\n",
    "\n",
    "    # Perform logistic regression to get Bob's new weights\n",
    "    model = LogisticRegression()\n",
    "    model.fit(X_scaled, y)\n",
    "\n",
    "    # Get the coefficients (these are Bob's new weights)\n",
    "    new_weight_bob = model.coef_[0]\n",
    "\n",
    "    # Calculate the cosine similarity between Alice's weight and Bob's new weight\n",
    "    weight_alice /= np.linalg.norm(weight_alice)\n",
    "    weight_bob /= np.linalg.norm(weight_bob)\n",
    "\n",
    "    cosine_similarity_with_model = np.dot(model.coef_[0], weight_alice) / (np.linalg.norm(model.coef_[0]) * np.linalg.norm(weight_alice))\n",
    "\n",
    "    cnt_update = 0\n",
    "    for example in train_dataset_bob:\n",
    "        score_0 = np.array([example['correctness_0'], example['coherence_0'], example['complexity_0'], example['verbosity_0']])\n",
    "        score_1 = np.array([example['correctness_1'], example['coherence_1'], example['complexity_1'], example['verbosity_1']])\n",
    "        score_0_bob_after = np.dot(model.coef_[0], score_0)\n",
    "        score_1_bob_after = np.dot(model.coef_[0], score_1)\n",
    "        score_0_alice = np.dot(weight_alice, score_0)\n",
    "        score_1_alice = np.dot(weight_alice, score_1)\n",
    "        if (score_0_bob_after - score_1_bob_after) * (score_0_alice - score_1_alice) > 0:\n",
    "            cnt_update += 1\n",
    "            \n",
    "    acc_after = cnt_update / len(train_dataset_bob)\n",
    "\n",
    "    return new_weight_bob, cosine_similarity_with_model, acc_after"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Compute Cosine Similarity and Accuracy using Influence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def compute_cs_and_accuracy_using_influence(\n",
    "    weight_alice, weight_bob, train_dataset_bob, influence, ratio=None, threshold=None\n",
    "):\n",
    "    # Determine the threshold based on ratio or use the provided threshold\n",
    "    if ratio is not None:\n",
    "        # Calculate threshold based on ratio\n",
    "        threshold = np.percentile(influence, 100 - ratio * 100)\n",
    "        print(f\"Using Ratio: {ratio}\")\n",
    "    elif threshold is None:\n",
    "        raise ValueError(\"Either ratio or threshold must be provided.\")\n",
    "    else:\n",
    "        print(f\"Using Threshold: {threshold}\")\n",
    "    # Determine negative indices\n",
    "    negative_indices = [i for i in range(len(influence)) if influence[i] > threshold]\n",
    "    # Normalize weight vectors\n",
    "    weight_alice /= np.linalg.norm(weight_alice)\n",
    "    weight_bob /= np.linalg.norm(weight_bob)\n",
    "    \n",
    "    # Initialize lists for score differences and labels\n",
    "    score_diffs = []\n",
    "    y_labels_inf = []\n",
    "    cnt = 0\n",
    "    # Iterate through the training dataset\n",
    "    for id, example in enumerate(train_dataset_bob):\n",
    "        score_0 = np.array([example['correctness_0'], example['coherence_0'], example['complexity_0'], example['verbosity_0']])\n",
    "        score_1 = np.array([example['correctness_1'], example['coherence_1'], example['complexity_1'], example['verbosity_1']])\n",
    "        \n",
    "        score_0_alice = np.dot(weight_alice, score_0)\n",
    "        score_1_alice = np.dot(weight_alice, score_1)\n",
    "        score_0_bob = np.dot(weight_bob, score_0)\n",
    "        score_1_bob = np.dot(weight_bob, score_1)\n",
    "        if (score_0_alice - score_1_alice) * (score_0_bob - score_1_bob) > 0:\n",
    "            cnt += 1\n",
    "        # Determine preference based on Bob's scores\n",
    "        preference_bob = 1 if score_1_bob > score_0_bob else 0\n",
    "        # Calculate score difference\n",
    "        score_diff = (score_1 - score_0) * (2 * preference_bob - 1)\n",
    "        score_diffs.append(score_diff)\n",
    "        \n",
    "        # Assign labels based on negative indices\n",
    "        if id in negative_indices:\n",
    "            y_labels_inf.append(0)\n",
    "        else:\n",
    "            y_labels_inf.append(1)\n",
    "    \n",
    "    # Convert lists to numpy arrays\n",
    "    score_diffs = np.array(score_diffs)\n",
    "    y_labels_inf = np.array(y_labels_inf)\n",
    "    \n",
    "    # Train the SVM classifier\n",
    "    classifier = svm.SVC(kernel='linear')\n",
    "    classifier.fit(score_diffs, y_labels_inf)\n",
    "    \n",
    "    # Calculate cosine similarity between weight vectors\n",
    "    cosine_similarity_with_bob = np.dot(weight_bob, weight_alice)\n",
    "    cosine_similarity_with_classifier = np.dot(classifier.coef_, weight_alice) / (np.linalg.norm(classifier.coef_) * np.linalg.norm(weight_alice))\n",
    "    \n",
    "    # use classifier.coef_ to measure accuracy \n",
    "    cnt_update = 0\n",
    "    for example in train_dataset_bob:\n",
    "        score_0 = np.array([example['correctness_0'], example['coherence_0'], example['complexity_0'], example['verbosity_0']])\n",
    "        score_1 = np.array([example['correctness_1'], example['coherence_1'], example['complexity_1'], example['verbosity_1']])\n",
    "        score_0_bob_after = np.dot(classifier.coef_, score_0)\n",
    "        score_1_bob_after = np.dot(classifier.coef_, score_1)\n",
    "        score_0_alice = np.dot(weight_alice, score_0)\n",
    "        score_1_alice = np.dot(weight_alice, score_1)\n",
    "        if (score_0_bob_after - score_1_bob_after) * (score_0_alice - score_1_alice) > 0:\n",
    "            cnt_update += 1\n",
    "            \n",
    "    acc_before = cnt / len(train_dataset_bob)\n",
    "    acc_after = cnt_update / len(train_dataset_bob)\n",
    "    \n",
    "    return cosine_similarity_with_bob, cosine_similarity_with_classifier[0], acc_before, acc_after, classifier.coef_[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Improve Bob's weight\n",
    "An example of updating one of the five bob weights."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using Ratio: 0.5\n",
      "Logistic Method:\n",
      "Updated Weight: [ 1.84123918  0.82987372  0.12163134 -0.23880032]\n",
      "Cosine Similarity: 0.9372436519316274\n",
      "Accuracy: 0.9739310512851748\n",
      "Influence Method:\n",
      "Updated Weight: [ 0.39927573  0.00136307  0.20142615 -0.20051068]\n",
      "Cosine Similarity: 0.9236632721871877\n",
      "Accuracy: 0.9713728834206359\n"
     ]
    }
   ],
   "source": [
    "weight_alice = np.array([1.04, 0.46, 0.47, -0.33]) # correctness, coherence, complexity, verbosity\n",
    "weight_bob = np.array([1.1, 1, 3.1, 3]) # correctness, coherence, complexity, verbosity\n",
    "\n",
    "# Compute suing logistic method\n",
    "updated_weight_log, cosine_similarity_log, accuracy_log = compute_cs_and_accuracy_using_logistic(weight_alice, weight_bob, val_dataset_alice, train_dataset_bob)\n",
    "\n",
    "# Compute using influence\n",
    "influence = utils.rapid_datainf(rapid_grad_train, rapid_grad_val, np.arange(len(val_dataset_alice)))\n",
    "prev_cosim, cosine_similarity_inf, prev_acc, accuracy_inf, updated_weight_inf = compute_cs_and_accuracy_using_influence(weight_alice, weight_bob, train_dataset_bob, influence, ratio=0.5)\n",
    "\n",
    "# Print the results\n",
    "print(\"Logistic Method:\")\n",
    "print(\"Updated Weight:\", updated_weight_log)\n",
    "print(\"Cosine Similarity:\", cosine_similarity_log)\n",
    "print(\"Accuracy:\", accuracy_log)\n",
    "\n",
    "print(\"Influence Method:\")\n",
    "print(\"Updated Weight:\", updated_weight_inf)\n",
    "print(\"Cosine Similarity:\", cosine_similarity_inf)\n",
    "print(\"Accuracy:\", accuracy_inf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "hri_rlhf",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
