{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 216
    },
    "executionInfo": {
     "elapsed": 33,
     "status": "error",
     "timestamp": 1743195041674,
     "user": {
      "displayName": "Jeff",
      "userId": "15773939950998775573"
     },
     "user_tz": -60
    },
    "id": "ZJbMDmYxdJzX",
    "outputId": "c8fc9e55-2e2a-4016-c582-47d90b775e8d"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "MODEL SAYS: Hello! How can I assist you today?\n",
      "scoring claude\n",
      "scoring gpt\n"
     ]
    }
   ],
   "source": [
    "# Install necessary libraries (run this only once)\n",
    "# pip install judges instructor openai\n",
    "\n",
    "\"\"\"\n",
    "This script uses produces the scores for the initial prompts and responses from Claude and GPT-4o using the Emotion Queen judges.\n",
    "The monkez patch is used to force judges to go through the ETH proxy.\n",
    "The voting methods.pz file wazs also modified to handle NAN values, strings, ... in the scores.\n",
    "If zou want to run this code please insert zour key and change voting methods.py in Libs/site-packages/judges/ \n",
    "\"\"\"\n",
    "\n",
    "\n",
    "import os\n",
    "# 1) your key  \n",
    "os.environ[\"OPENAI_API_KEY\"]  = \"My key\"  \n",
    "# 2) proxy’s v1 root  \n",
    "os.environ[\"OPENAI_API_BASE\"] = \"https://litellm.sph-prod.ethz.ch/v1\"  \n",
    "\n",
    "import openai  \n",
    "# these two lines are optional if you set the env-vars above, \n",
    "# but safe to do again in code:\n",
    "openai.api_key  = os.environ[\"OPENAI_API_KEY\"]  \n",
    "openai.api_base = os.environ[\"OPENAI_API_BASE\"]  \n",
    "\n",
    "from openai import OpenAI\n",
    "\n",
    "client = OpenAI(\n",
    "    api_key  = \"My key\",                             # or rely on env-var\n",
    "    base_url = \"https://litellm.sph-prod.ethz.ch/v1\"  # your LiteLLM proxy root\n",
    ")\n",
    "\n",
    "# ── monkey-patch judges to use the client ────────────────────────\n",
    "import judges._client as _jc\n",
    "_jc.llm_client = lambda *args, **kwargs: client\n",
    "\n",
    "# 2) smoke-test chat call\n",
    "resp = client.chat.completions.create(\n",
    "    model    = \"gpt-4o-mini\",\n",
    "    messages = [{\"role\":\"user\",\"content\":\"Say hello\"}]\n",
    ")\n",
    "print(\"MODEL SAYS:\", resp.choices[0].message.content) \n",
    "\n",
    "import pandas as pd\n",
    "from judges.graders.empathy import EmotionQueenImplicitEmotionRecognition,EmotionQueenIntentionRecognition,EmotionQueenKeyEventRecognition,EmotionQueenMixedEventRecognition\n",
    "from judges import Jury\n",
    "\n",
    "# Load the data\n",
    "df_claude = pd.read_csv(\"data/raw/responses/claude_responses_empathy.csv\", encoding=\"utf-8\")\n",
    "df_gpt = pd.read_csv(\"data/raw/responses/gpt_responses.csv\", encoding=\"utf-8\")\n",
    "\n",
    "# Build a jury of 5 empathy judges\n",
    "agents_implicit = [\n",
    "    EmotionQueenImplicitEmotionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenImplicitEmotionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenImplicitEmotionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenImplicitEmotionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenImplicitEmotionRecognition(model=\"gpt-4o\"),\n",
    "]\n",
    "\n",
    "agents_intention = [\n",
    "    EmotionQueenIntentionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenIntentionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenIntentionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenIntentionRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenIntentionRecognition(model=\"gpt-4o\"),\n",
    "]\n",
    "\n",
    "agents_keyEvent = [\n",
    "    EmotionQueenKeyEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenKeyEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenKeyEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenKeyEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenKeyEventRecognition(model=\"gpt-4o\"),\n",
    "]\n",
    "\n",
    "agents_mixedEvent = [\n",
    "    EmotionQueenMixedEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenMixedEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenMixedEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenMixedEventRecognition(model=\"gpt-4o\"),\n",
    "    EmotionQueenMixedEventRecognition(model=\"gpt-4o\"),\n",
    "]\n",
    "\n",
    "\n",
    "jury_implicit = Jury(judges=agents_implicit, voting_method=\"average\")\n",
    "jury_intention = Jury(judges=agents_intention, voting_method=\"average\")\n",
    "jury_keyEvent = Jury(judges=agents_keyEvent, voting_method=\"average\")\n",
    "jury_mixedEvent = Jury(judges=agents_mixedEvent, voting_method=\"average\")\n",
    "\n",
    "# Map each row → a verdict\n",
    "def score(row, jury):\n",
    "    verdict = jury.vote(\n",
    "        input   = row[\"Prompt Text\"],\n",
    "        output  = row[\"Model Response\"],\n",
    "        expected=None\n",
    "    )\n",
    "    return float(verdict.score)\n",
    "\n",
    "print(\"scoring claude\")\n",
    "\n",
    "df_claude[\"Implicit Emotion Recognition\"] = df_claude.apply(score, axis=1, args=(jury_implicit,))\n",
    "df_claude[\"Intention Recognition\"] = df_claude.apply(score, axis=1, args=(jury_intention,))\n",
    "df_claude[\"Key Event Recognition\"] = df_claude.apply(score, axis=1, args=(jury_keyEvent,))\n",
    "df_claude[\"Mixed Event Recognition\"] = df_claude.apply(score, axis=1, args=(jury_mixedEvent,))\n",
    "df_claude.to_csv(\"initial_prompts_with_responses_claude_scored_by_Queen.csv\", encoding=\"utf-8\")\n",
    "\n",
    "print(\"scoring gpt\")\n",
    "\n",
    "df_gpt[\"Implicit Emotion Recognition\"] = df_gpt.apply(score, axis=1, args=(jury_implicit,))\n",
    "df_gpt[\"Intention Recognition\"] = df_gpt.apply(score, axis=1, args=(jury_intention,))\n",
    "df_gpt[\"Key Event Recognition\"] = df_gpt.apply(score, axis=1, args=(jury_keyEvent,))\n",
    "df_gpt[\"Mixed Event Recognition\"] = df_gpt.apply(score, axis=1, args=(jury_mixedEvent,))\n",
    "df_gpt.to_csv(\"initial_prompts_with_responses_gpt_scored_by_Queen.csv\", encoding=\"utf-8\")\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "        \n",
    "        \n",
    "    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "j9qaBL7FdbL1"
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyOjA8IOZAiNGe8Wy2eBwwnN",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "ai4good",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
