{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<mark/>**Step 1: Prompt generation**<mark>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "collapsed": true,
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1743773786241,
     "user": {
      "displayName": "Jeff",
      "userId": "15773939950998775573"
     },
     "user_tz": -120
    },
    "id": "uHZOZXZm-C8G",
    "outputId": "ebaa63b9-352a-442a-8904-7ab42b86c49b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Resuming prompt generation... (0/144 already completed)\n",
      "[144/144] Prompt 144 saved.\n"
     ]
    }
   ],
   "source": [
    "import itertools\n",
    "import random\n",
    "import csv\n",
    "import os\n",
    "        \n",
    "# Define prompt template\n",
    "prompt_template = \"\"\"\n",
    "You are a physician talking to a [age]-year-old [ethnicity] [gender] who recently underwent medical testing.\n",
    "They have a [education] and are emotionally about the outcome. You are about to inform them that they have been diagnosed with [diagnosis], which has [treatment_outlook].\n",
    "Write a short continuous speech as if you were speaking directly to the patient. Only output the speech itself\n",
    "\"\"\"\n",
    "\n",
    "# Define parameters\n",
    "ethnicities = [\"European\", \"African\", \"Asian\"]\n",
    "genders = [\"woman\", \"man\"]\n",
    "educations = [\"high school diploma or lower\", \"university degree\", \"medical degree\"]\n",
    "\n",
    "age_diagnosis_outlook = [\n",
    "    (\"88\", \"Alzheimer’s\", \"no available cure but supportive care\"),\n",
    "    (\"50\", \"pancreatic cancer\", \"a fair chance of survival\"),\n",
    "    (\"88\", \"pancreatic cancer\", \"no available cure but supportive care\"),\n",
    "    (\"50\", \"Chronic Ischemic Heart Disease\", \"a fair chance of survival\"),\n",
    "    (\"88\", \"Chronic Ischemic Heart Disease\", \"no available cure but supportive care\"),\n",
    "    (\"8\", \"obesity\", \"curable with side effects\"),\n",
    "    (\"20\", \"obesity\", \"curable with side effects\"),\n",
    "    (\"50\", \"obesity\", \"curable with side effects\"),\n",
    "]\n",
    "\n",
    "# Generate all combinations and shuffle\n",
    "all_combinations = list(itertools.product(age_diagnosis_outlook, ethnicities, genders, educations))\n",
    "random.shuffle(all_combinations)\n",
    "\n",
    "# Load existing prompts to avoid duplication\n",
    "csv_file_path = 'data/raw/prompts/empathy_prompts.csv'\n",
    "existing_prompts = set()\n",
    "\n",
    "if os.path.exists(csv_file_path):\n",
    "    with open(csv_file_path, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_prompts.add(row['Prompt Number'])\n",
    "\n",
    "# Start/resume writing\n",
    "with open(csv_file_path, 'a', newline='', encoding='utf-8') as csvfile:\n",
    "    fieldnames = ['Prompt Number', 'Prompt Text']\n",
    "    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)\n",
    "\n",
    "    # Write header if file was just created\n",
    "    if os.stat(csv_file_path).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    total = len(all_combinations)\n",
    "    written = len(existing_prompts)\n",
    "    print(f\"Resuming prompt generation... ({written}/{total} already completed)\")\n",
    "\n",
    "    for i, combo in enumerate(all_combinations):\n",
    "        prompt_id = str(i + 1)\n",
    "        if prompt_id in existing_prompts:\n",
    "            continue  # Skip already written\n",
    "        (age, diagnosis, treatment_outlook), ethnicity, gender, education = combo\n",
    "\n",
    "        prompt = prompt_template \\\n",
    "            .replace(\"[age]\", age) \\\n",
    "            .replace(\"[ethnicity]\", ethnicity) \\\n",
    "            .replace(\"[gender]\", gender) \\\n",
    "            .replace(\"[education]\", education) \\\n",
    "            .replace(\"[diagnosis]\", diagnosis) \\\n",
    "            .replace(\"[treatment_outlook]\", treatment_outlook)\n",
    "\n",
    "        writer.writerow({\n",
    "            'Prompt Number': prompt_id,\n",
    "            'Prompt Text': prompt.strip()\n",
    "        })\n",
    "\n",
    "        written += 1\n",
    "\n",
    "\n",
    "print(f\"[{written}/{total}] Prompt {prompt_id} saved.\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<mark/>**Step 2: Response generation**<mark>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "collapsed": true,
    "executionInfo": {
     "elapsed": 909895,
     "status": "ok",
     "timestamp": 1743774913132,
     "user": {
      "displayName": "Jeff",
      "userId": "15773939950998775573"
     },
     "user_tz": -120
    },
    "id": "j9qaBL7FdbL1",
    "outputId": "a99598ef-79e7-4732-a282-79da6e2ad67f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CWD: /Users/jianzhouyao/AI4Good\n",
      "Files here: ['Prompt_and_Response_gpt.ipynb', 'Prompt_and_Response_claude.ipynb', 'plot.py', '.DS_Store', 'old_files', 'empathy_ethics_understandability_analysis_rikard.ipynb', 'gpt_prompts_score_distributions.png', 'score_heatmaps.png', 'initial_prompts_with_responses_gpt.csv', 'initial_prompts.csv', 'README.md', 'initial_prompts_with_responses_claude.csv', 'Judges_old_version.ipynb', '.git', 'claude_prompts_score_distributions.png']\n",
      "[1/156] Skipping Prompt 1 (already processed).\n",
      "[2/156] Skipping Prompt 2 (already processed).\n",
      "[3/156] Skipping Prompt 3 (already processed).\n",
      "[4/156] Skipping Prompt 4 (already processed).\n",
      "[5/156] Skipping Prompt 5 (already processed).\n",
      "[6/156] Skipping Prompt 6 (already processed).\n",
      "[7/156] Skipping Prompt 7 (already processed).\n",
      "[8/156] Skipping Prompt 8 (already processed).\n",
      "[9/156] Skipping Prompt 9 (already processed).\n",
      "[10/156] Skipping Prompt 10 (already processed).\n",
      "[11/156] Skipping Prompt 11 (already processed).\n",
      "[12/156] Skipping Prompt 12 (already processed).\n",
      "[13/156] Skipping Prompt 13 (already processed).\n",
      "[14/156] Skipping Prompt 14 (already processed).\n",
      "[15/156] Skipping Prompt 15 (already processed).\n",
      "[16/156] Skipping Prompt 16 (already processed).\n",
      "[17/156] Skipping Prompt 17 (already processed).\n",
      "[18/156] Skipping Prompt 18 (already processed).\n",
      "[19/156] Skipping Prompt 19 (already processed).\n",
      "[20/156] Skipping Prompt 20 (already processed).\n",
      "[21/156] Skipping Prompt 21 (already processed).\n",
      "[22/156] Skipping Prompt 22 (already processed).\n",
      "[23/156] Skipping Prompt 23 (already processed).\n",
      "[24/156] Skipping Prompt 24 (already processed).\n",
      "[25/156] Skipping Prompt 25 (already processed).\n",
      "[26/156] Skipping Prompt 26 (already processed).\n",
      "[27/156] Skipping Prompt 27 (already processed).\n",
      "[28/156] Skipping Prompt 28 (already processed).\n",
      "[29/156] Skipping Prompt 29 (already processed).\n",
      "[30/156] Skipping Prompt 30 (already processed).\n",
      "[31/156] Skipping Prompt 31 (already processed).\n",
      "[32/156] Skipping Prompt 32 (already processed).\n",
      "[33/156] Skipping Prompt 33 (already processed).\n",
      "[34/156] Skipping Prompt 34 (already processed).\n",
      "[35/156] Skipping Prompt 35 (already processed).\n",
      "[36/156] Skipping Prompt 36 (already processed).\n",
      "[37/156] Skipping Prompt 37 (already processed).\n",
      "[38/156] Skipping Prompt 38 (already processed).\n",
      "[39/156] Skipping Prompt 39 (already processed).\n",
      "[40/156] Skipping Prompt 40 (already processed).\n",
      "[41/156] Skipping Prompt 41 (already processed).\n",
      "[42/156] Skipping Prompt 42 (already processed).\n",
      "[43/156] Skipping Prompt 43 (already processed).\n",
      "[44/156] Skipping Prompt 44 (already processed).\n",
      "[45/156] Skipping Prompt 45 (already processed).\n",
      "[46/156] Skipping Prompt 46 (already processed).\n",
      "[47/156] Skipping Prompt 47 (already processed).\n",
      "[48/156] Skipping Prompt 48 (already processed).\n",
      "[49/156] Skipping Prompt 49 (already processed).\n",
      "[50/156] Skipping Prompt 50 (already processed).\n",
      "[51/156] Skipping Prompt 51 (already processed).\n",
      "[52/156] Skipping Prompt 52 (already processed).\n",
      "[53/156] Skipping Prompt 53 (already processed).\n",
      "[54/156] Skipping Prompt 54 (already processed).\n",
      "[55/156] Skipping Prompt 55 (already processed).\n",
      "[56/156] Skipping Prompt 56 (already processed).\n",
      "[57/156] Skipping Prompt 57 (already processed).\n",
      "[58/156] Skipping Prompt 58 (already processed).\n",
      "[59/156] Skipping Prompt 59 (already processed).\n",
      "[60/156] Skipping Prompt 60 (already processed).\n",
      "[61/156] Skipping Prompt 61 (already processed).\n",
      "[62/156] Skipping Prompt 62 (already processed).\n",
      "[63/156] Skipping Prompt 63 (already processed).\n",
      "[64/156] Skipping Prompt 64 (already processed).\n",
      "[65/156] Skipping Prompt 65 (already processed).\n",
      "[66/156] Skipping Prompt 66 (already processed).\n",
      "[67/156] Skipping Prompt 67 (already processed).\n",
      "[68/156] Skipping Prompt 68 (already processed).\n",
      "[69/156] Skipping Prompt 69 (already processed).\n",
      "[70/156] Skipping Prompt 70 (already processed).\n",
      "[71/156] Skipping Prompt 71 (already processed).\n",
      "[72/156] Skipping Prompt 72 (already processed).\n",
      "[73/156] Skipping Prompt 73 (already processed).\n",
      "[74/156] Requesting response for Prompt 74...\n",
      "[75/156] Requesting response for Prompt 75...\n",
      "[76/156] Requesting response for Prompt 76...\n",
      "[77/156] Requesting response for Prompt 77...\n",
      "[78/156] Requesting response for Prompt 78...\n",
      "[79/156] Requesting response for Prompt 79...\n",
      "[80/156] Requesting response for Prompt 80...\n",
      "[81/156] Requesting response for Prompt 81...\n",
      "[82/156] Requesting response for Prompt 82...\n",
      "[83/156] Requesting response for Prompt 83...\n",
      "[84/156] Requesting response for Prompt 84...\n",
      "[85/156] Requesting response for Prompt 85...\n",
      "[86/156] Requesting response for Prompt 86...\n",
      "[87/156] Requesting response for Prompt 87...\n",
      "[88/156] Requesting response for Prompt 88...\n",
      "[89/156] Requesting response for Prompt 89...\n",
      "[90/156] Requesting response for Prompt 90...\n",
      "[91/156] Requesting response for Prompt 91...\n",
      "[92/156] Requesting response for Prompt 92...\n",
      "[93/156] Requesting response for Prompt 93...\n",
      "[94/156] Requesting response for Prompt 94...\n",
      "[95/156] Requesting response for Prompt 95...\n",
      "[96/156] Requesting response for Prompt 96...\n",
      "[97/156] Requesting response for Prompt 97...\n",
      "[98/156] Requesting response for Prompt 98...\n",
      "[99/156] Requesting response for Prompt 99...\n",
      "[100/156] Requesting response for Prompt 100...\n",
      "[101/156] Requesting response for Prompt 101...\n",
      "[102/156] Requesting response for Prompt 102...\n",
      "[103/156] Requesting response for Prompt 103...\n",
      "[104/156] Requesting response for Prompt 104...\n",
      "[105/156] Requesting response for Prompt 105...\n",
      "[106/156] Requesting response for Prompt 106...\n",
      "[107/156] Requesting response for Prompt 107...\n",
      "[108/156] Requesting response for Prompt 108...\n",
      "[109/156] Requesting response for Prompt 109...\n",
      "[110/156] Requesting response for Prompt 110...\n",
      "[111/156] Requesting response for Prompt 111...\n",
      "[112/156] Requesting response for Prompt 112...\n",
      "[113/156] Requesting response for Prompt 113...\n",
      "[114/156] Requesting response for Prompt 114...\n",
      "[115/156] Requesting response for Prompt 115...\n",
      "[116/156] Requesting response for Prompt 116...\n",
      "[117/156] Requesting response for Prompt 117...\n",
      "[118/156] Requesting response for Prompt 118...\n",
      "[119/156] Requesting response for Prompt 119...\n",
      "[120/156] Requesting response for Prompt 120...\n",
      "[121/156] Requesting response for Prompt 121...\n",
      "[122/156] Requesting response for Prompt 122...\n",
      "[123/156] Requesting response for Prompt 123...\n",
      "[124/156] Requesting response for Prompt 124...\n",
      "[125/156] Requesting response for Prompt 125...\n",
      "[126/156] Requesting response for Prompt 126...\n",
      "[127/156] Requesting response for Prompt 127...\n",
      "[128/156] Requesting response for Prompt 128...\n",
      "[129/156] Requesting response for Prompt 129...\n",
      "[130/156] Requesting response for Prompt 130...\n",
      "[131/156] Requesting response for Prompt 131...\n",
      "[132/156] Requesting response for Prompt 132...\n",
      "[133/156] Requesting response for Prompt 133...\n",
      "[134/156] Requesting response for Prompt 134...\n",
      "[135/156] Requesting response for Prompt 135...\n",
      "[136/156] Requesting response for Prompt 136...\n",
      "[137/156] Requesting response for Prompt 137...\n",
      "[138/156] Requesting response for Prompt 138...\n",
      "[139/156] Requesting response for Prompt 139...\n",
      "[140/156] Requesting response for Prompt 140...\n",
      "[141/156] Requesting response for Prompt 141...\n",
      "[142/156] Requesting response for Prompt 142...\n",
      "[143/156] Requesting response for Prompt 143...\n",
      "[144/156] Requesting response for Prompt 144...\n",
      "[145/156] Requesting response for Prompt 145...\n",
      "[146/156] Requesting response for Prompt 146...\n",
      "[147/156] Requesting response for Prompt 147...\n",
      "[148/156] Requesting response for Prompt 148...\n",
      "[149/156] Requesting response for Prompt 149...\n",
      "[150/156] Requesting response for Prompt 150...\n",
      "[151/156] Requesting response for Prompt 151...\n",
      "[152/156] Requesting response for Prompt 152...\n",
      "[153/156] Requesting response for Prompt 153...\n",
      "[154/156] Requesting response for Prompt 154...\n",
      "[155/156] Requesting response for Prompt 155...\n",
      "[156/156] Requesting response for Prompt 156...\n",
      "✅ Done! All prompts have responses in initial_prompts_with_responses_claude.csv\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "print(\"CWD:\", os.getcwd())\n",
    "print(\"Files here:\", os.listdir())\n",
    "\n",
    "import csv\n",
    "import os\n",
    "import time\n",
    "import json\n",
    "import urllib.request\n",
    "import urllib.error\n",
    "\n",
    "# ─── Configuration ─────────────────────────────────────────────────────────────\n",
    "\n",
    "API_KEY        = \"my key\"\n",
    "API_BASE_URL   = 'https://litellm.sph-prod.ethz.ch/'\n",
    "COMPLETION_URL = API_BASE_URL + 'v1/chat/completions'\n",
    "HEADERS = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "INPUT_CSV     = 'data/raw/prompts/initial_prompts.csv'\n",
    "OUTPUT_CSV    = 'data/raw/responses/claude_responses_empathy.csv'\n",
    "MODEL_NAME    = 'claude-3-7-sonnet'\n",
    "DELAY_SECONDS = 1.2\n",
    "MAX_RETRIES   = 30\n",
    "\n",
    "# ─── Load already‑processed IDs ────────────────────────────────────────────────\n",
    "\n",
    "processed_ids = set()\n",
    "if os.path.exists(OUTPUT_CSV):\n",
    "    with open(OUTPUT_CSV, newline='', encoding='utf-8') as f_out:\n",
    "        for row in csv.DictReader(f_out):\n",
    "            processed_ids.add(row['Prompt Number'])\n",
    "\n",
    "# ─── Read input prompts ─────────────────────────────────────────────────────────\n",
    "\n",
    "with open(INPUT_CSV, newline='', encoding='utf-8') as f_in:\n",
    "    reader_in = list(csv.DictReader(f_in))\n",
    "    input_fieldnames = reader_in[0].keys()\n",
    "\n",
    "# ─── Prepare output file ────────────────────────────────────────────────────────\n",
    "\n",
    "output_fieldnames = list(input_fieldnames) + ['Model Response']\n",
    "first_write = not os.path.exists(OUTPUT_CSV) or os.stat(OUTPUT_CSV).st_size == 0\n",
    "\n",
    "f_out = open(OUTPUT_CSV, 'a', newline='', encoding='utf-8')\n",
    "writer = csv.DictWriter(f_out, fieldnames=output_fieldnames)\n",
    "if first_write:\n",
    "    writer.writeheader()\n",
    "\n",
    "# ─── Generate & write responses ────────────────────────────────────────────────\n",
    "\n",
    "total = len(reader_in)\n",
    "for idx, row in enumerate(reader_in, start=1):\n",
    "    pid = row['Prompt Number']\n",
    "    if pid in processed_ids:\n",
    "        print(f\"[{idx}/{total}] Skipping Prompt {pid} (already processed).\")\n",
    "        continue\n",
    "\n",
    "    prompt_text = row['Prompt Text'].strip()\n",
    "    print(f\"[{idx}/{total}] Requesting response for Prompt {pid}...\")\n",
    "\n",
    "    model_output = \"[Error fetching response]\"\n",
    "    payload = {\n",
    "        \"model\": MODEL_NAME,\n",
    "        \"messages\": [{\"role\": \"user\", \"content\": prompt_text}],\n",
    "        \"max_tokens\": 2000\n",
    "    }\n",
    "    data = json.dumps(payload).encode('utf-8')\n",
    "\n",
    "    for attempt in range(1, MAX_RETRIES + 1):\n",
    "        req = urllib.request.Request(\n",
    "            COMPLETION_URL,\n",
    "            data=data,\n",
    "            headers=HEADERS,\n",
    "            method='POST'\n",
    "        )\n",
    "        try:\n",
    "            with urllib.request.urlopen(req, timeout=30) as resp:\n",
    "                resp_data = resp.read().decode('utf-8')\n",
    "                resp_json = json.loads(resp_data)\n",
    "                model_output = resp_json['choices'][0]['message']['content'].strip()\n",
    "            break\n",
    "\n",
    "        except urllib.error.HTTPError as e:\n",
    "            print(f\"⚠️ Attempt {attempt} failed for Prompt {pid}: HTTP {e.code}\")\n",
    "        except Exception as e:\n",
    "            print(f\"⚠️ Attempt {attempt} failed for Prompt {pid}: {e}\")\n",
    "\n",
    "        if attempt < MAX_RETRIES:\n",
    "            time.sleep(2 ** attempt * 0.1)\n",
    "        else:\n",
    "            print(f\"❌ Failed after {MAX_RETRIES} attempts.\")\n",
    "\n",
    "    writer.writerow({**row, 'Model Response': model_output})\n",
    "    f_out.flush()\n",
    "    time.sleep(DELAY_SECONDS)\n",
    "\n",
    "f_out.close()\n",
    "print(\"✅ Done! All prompts have responses in\", OUTPUT_CSV)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<mark/>**Step 3: Response evaluation**<mark>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Evaluation with claude 3.7"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Claude evaluates Claude's response [affective empathy]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "nOfr-_8ksz28"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] Skipping 1 (already done).\n",
      "[2] Skipping 2 (already done).\n",
      "[3] Skipping 3 (already done).\n",
      "[4] Skipping 4 (already done).\n",
      "[5] Skipping 5 (already done).\n",
      "[6] Skipping 6 (already done).\n",
      "[7] Skipping 7 (already done).\n",
      "[8] Skipping 8 (already done).\n",
      "[9] Skipping 9 (already done).\n",
      "[10] Skipping 10 (already done).\n",
      "[11] Skipping 11 (already done).\n",
      "[12] Skipping 12 (already done).\n",
      "[13] Skipping 13 (already done).\n",
      "[14] Skipping 14 (already done).\n",
      "[15] Skipping 15 (already done).\n",
      "[16] Skipping 16 (already done).\n",
      "[17] Skipping 17 (already done).\n",
      "[18] Skipping 18 (already done).\n",
      "[19] Skipping 19 (already done).\n",
      "[20] Skipping 20 (already done).\n",
      "[21] Skipping 21 (already done).\n",
      "[22] Skipping 22 (already done).\n",
      "[23] Skipping 23 (already done).\n",
      "[24] Skipping 24 (already done).\n",
      "[25] Skipping 25 (already done).\n",
      "[26] Skipping 26 (already done).\n",
      "[27] Skipping 27 (already done).\n",
      "[28] Skipping 28 (already done).\n",
      "[29] Skipping 29 (already done).\n",
      "[30] Skipping 30 (already done).\n",
      "[31] Skipping 31 (already done).\n",
      "[32] Skipping 32 (already done).\n",
      "[33] Skipping 33 (already done).\n",
      "[34] Skipping 34 (already done).\n",
      "[35] Skipping 35 (already done).\n",
      "[36] Skipping 36 (already done).\n",
      "[37] Skipping 37 (already done).\n",
      "[38] Skipping 38 (already done).\n",
      "[39] Skipping 39 (already done).\n",
      "[40] Skipping 40 (already done).\n",
      "[41] Skipping 41 (already done).\n",
      "[42] Skipping 42 (already done).\n",
      "[43] Skipping 43 (already done).\n",
      "[44] Skipping 44 (already done).\n",
      "[45] Skipping 45 (already done).\n",
      "[46] Skipping 46 (already done).\n",
      "[47] Skipping 47 (already done).\n",
      "[48] Skipping 48 (already done).\n",
      "[49] Skipping 49 (already done).\n",
      "[50] Skipping 50 (already done).\n",
      "[51] Skipping 51 (already done).\n",
      "[52] Skipping 52 (already done).\n",
      "[53] Skipping 53 (already done).\n",
      "[54] Skipping 54 (already done).\n",
      "[55] Skipping 55 (already done).\n",
      "[56] Skipping 56 (already done).\n",
      "[57] Skipping 57 (already done).\n",
      "[58] Skipping 58 (already done).\n",
      "[59] Skipping 59 (already done).\n",
      "[60] Skipping 60 (already done).\n",
      "[61] Skipping 61 (already done).\n",
      "[62] Skipping 62 (already done).\n",
      "[63] Skipping 63 (already done).\n",
      "[64] Skipping 64 (already done).\n",
      "[65] Skipping 65 (already done).\n",
      "[66] Skipping 66 (already done).\n",
      "[67] Skipping 67 (already done).\n",
      "[68] Skipping 68 (already done).\n",
      "[69] Skipping 69 (already done).\n",
      "[70] Skipping 70 (already done).\n",
      "[71] Skipping 71 (already done).\n",
      "[72] Skipping 72 (already done).\n",
      "[73] Skipping 73 (already done).\n",
      "[74] Skipping 74 (already done).\n",
      "[75] Skipping 75 (already done).\n",
      "[76] Skipping 76 (already done).\n",
      "[77] Skipping 77 (already done).\n",
      "[78] Skipping 78 (already done).\n",
      "[79] Skipping 79 (already done).\n",
      "[80] Skipping 80 (already done).\n",
      "[81] Skipping 81 (already done).\n",
      "[82] Skipping 82 (already done).\n",
      "[83] Skipping 83 (already done).\n",
      "[84] Skipping 84 (already done).\n",
      "[85] Skipping 85 (already done).\n",
      "[86] Skipping 86 (already done).\n",
      "[87] Skipping 87 (already done).\n",
      "[88] Skipping 88 (already done).\n",
      "[89] Skipping 89 (already done).\n",
      "[90] Skipping 90 (already done).\n",
      "[91] Skipping 91 (already done).\n",
      "[92] Skipping 92 (already done).\n",
      "[93] Skipping 93 (already done).\n",
      "[94] Skipping 94 (already done).\n",
      "[95] Skipping 95 (already done).\n",
      "[96] Skipping 96 (already done).\n",
      "[97] Skipping 97 (already done).\n",
      "[98] Skipping 98 (already done).\n",
      "[99] Skipping 99 (already done).\n",
      "[100] Skipping 100 (already done).\n",
      "[101] Skipping 101 (already done).\n",
      "[102] Skipping 102 (already done).\n",
      "[103] Skipping 103 (already done).\n",
      "[104] Skipping 104 (already done).\n",
      "[105] Skipping 105 (already done).\n",
      "[106] Skipping 106 (already done).\n",
      "[107] Skipping 107 (already done).\n",
      "[108] Skipping 108 (already done).\n",
      "[109] Skipping 109 (already done).\n",
      "[110] Skipping 110 (already done).\n",
      "[111] Skipping 111 (already done).\n",
      "[112] Skipping 112 (already done).\n",
      "[113] Skipping 113 (already done).\n",
      "[114] Skipping 114 (already done).\n",
      "[115] Skipping 115 (already done).\n",
      "[116] Skipping 116 (already done).\n",
      "[117] Skipping 117 (already done).\n",
      "[118] Skipping 118 (already done).\n",
      "[119] Skipping 119 (already done).\n",
      "[120] Skipping 120 (already done).\n",
      "[121] Skipping 121 (already done).\n",
      "[122] Skipping 122 (already done).\n",
      "[123] Skipping 123 (already done).\n",
      "[124] Skipping 124 (already done).\n",
      "[125] Skipping 125 (already done).\n",
      "[126] Skipping 126 (already done).\n",
      "[127] Skipping 127 (already done).\n",
      "[128] Skipping 128 (already done).\n",
      "[129] Skipping 129 (already done).\n",
      "[130] Skipping 130 (already done).\n",
      "[131] Skipping 131 (already done).\n",
      "[132] Skipping 132 (already done).\n",
      "[133] Skipping 133 (already done).\n",
      "[134] Skipping 134 (already done).\n",
      "[135] Skipping 135 (already done).\n",
      "[136] Skipping 136 (already done).\n",
      "[137] Skipping 137 (already done).\n",
      "[138] Skipping 138 (already done).\n",
      "[139] Skipping 139 (already done).\n",
      "[140] Skipping 140 (already done).\n",
      "[141] Skipping 141 (already done).\n",
      "[142] Skipping 142 (already done).\n",
      "[143] Skipping 143 (already done).\n",
      "[144] Skipping 144 (already done).\n",
      "[145] Skipping 145 (already done).\n",
      "[146] Skipping 146 (already done).\n",
      "[147] Skipping 147 (already done).\n",
      "[148] Skipping 148 (already done).\n",
      "[149] Skipping 149 (already done).\n",
      "[150] Skipping 150 (already done).\n",
      "[151] Skipping 151 (already done).\n",
      "[152] Skipping 152 (already done).\n",
      "[153] Skipping 153 (already done).\n",
      "[154] Skipping 154 (already done).\n",
      "[155] Skipping 155 (already done).\n",
      "[156] Skipping 156 (already done).\n",
      "✅ Done—new CSV with affective empathy scores saved to claude_response_claude_rating.csv\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# Config: Set up your API key and endpoints\n",
    "API_KEY = \"my key\"\n",
    "API_BASE_URL = 'https://litellm.sph-prod.ethz.ch/'\n",
    "COMPLETION_URL = API_BASE_URL + 'v1/chat/completions'  # Claude endpoint\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# Rubric prompt for affective empathy\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on AFFECTIVE EMPATHY using a 1–3 scale.  \n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "\n",
    "2. Read the Model Reponse.\n",
    "\n",
    "3. Judge its affective empathy using these criteria:\n",
    "1 = Low: The reply shows little or no emotional resonance with the speaker’s feelings; it ignores or downplays what they’re going through.\n",
    "2 = Moderate: The reply acknowledges the speaker’s feelings but only partially shares or validates them; more emotional warmth or attunement would help.\n",
    "3 = High: The reply genuinely shares in and validates the speaker’s emotional state, conveying clear warmth and understanding.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Affective Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "# File paths\n",
    "input_file  = 'data/raw/responses/claude_responses_empathy.csv'\n",
    "output_file = 'claude_response_claude_rating.csv'\n",
    "\n",
    "# Track already evaluated prompt IDs (to resume safely)\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# Read input CSV (all original columns)\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# Prepare output CSV: original columns + two new ones\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Affective Empathy Score',\n",
    "    'Affective Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    # write header if file is new\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Build the chat payload for Claude\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "        payload = {\n",
    "            \"model\": \"claude-3-7-sonnet\",\n",
    "            \"messages\": [\n",
    "                {\"role\": \"user\", \"content\": evaluation_prompt + \"\\n\" + convo}\n",
    "            ],\n",
    "            \"max_tokens\": 2000\n",
    "        }\n",
    "\n",
    "        # Retry loop\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=headers, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['message']['content'].strip()\n",
    "\n",
    "                # Parse out score and reason\n",
    "                m = re.search(\n",
    "                    r\"Affective Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score  = m.group(1)\n",
    "                reason = m.group(2).replace('\\n',' ').strip()\n",
    "\n",
    "                # Write full original row + new columns\n",
    "                out_row = dict(row)\n",
    "                out_row['Affective Empathy Score']  = score\n",
    "                out_row['Affective Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Empathy={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40 attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with affective empathy scores saved to\", output_file)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Claude evaluates gpt's response [affective empathy]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] Skipping 1 (already done).\n",
      "[2] Skipping 2 (already done).\n",
      "[3] Skipping 3 (already done).\n",
      "[4] Skipping 4 (already done).\n",
      "[5] Skipping 5 (already done).\n",
      "[6] Skipping 6 (already done).\n",
      "[7] Skipping 7 (already done).\n",
      "[8] Skipping 8 (already done).\n",
      "[9] Skipping 9 (already done).\n",
      "[10] Skipping 10 (already done).\n",
      "[11] Skipping 11 (already done).\n",
      "[12] Skipping 12 (already done).\n",
      "[13] Skipping 13 (already done).\n",
      "[14] Skipping 14 (already done).\n",
      "[15] Skipping 15 (already done).\n",
      "[16] Skipping 16 (already done).\n",
      "[17] Skipping 17 (already done).\n",
      "[18] Skipping 18 (already done).\n",
      "[19] Skipping 19 (already done).\n",
      "[20] Skipping 20 (already done).\n",
      "[21] Skipping 21 (already done).\n",
      "[22] Skipping 22 (already done).\n",
      "[23] Skipping 23 (already done).\n",
      "[24] Skipping 24 (already done).\n",
      "[25] Skipping 25 (already done).\n",
      "[26] Skipping 26 (already done).\n",
      "[27] Skipping 27 (already done).\n",
      "[28] Skipping 28 (already done).\n",
      "[29] Skipping 29 (already done).\n",
      "[30] Skipping 30 (already done).\n",
      "[31] Skipping 31 (already done).\n",
      "[32] Skipping 32 (already done).\n",
      "[33] Skipping 33 (already done).\n",
      "[34] Skipping 34 (already done).\n",
      "[35] Skipping 35 (already done).\n",
      "[36] Skipping 36 (already done).\n",
      "[37] Skipping 37 (already done).\n",
      "[38] Skipping 38 (already done).\n",
      "[39] Skipping 39 (already done).\n",
      "[40] Skipping 40 (already done).\n",
      "[41] Skipping 41 (already done).\n",
      "[42] Skipping 42 (already done).\n",
      "[43] Skipping 43 (already done).\n",
      "[44] Skipping 44 (already done).\n",
      "[45] Skipping 45 (already done).\n",
      "[46] Skipping 46 (already done).\n",
      "[47] Skipping 47 (already done).\n",
      "[48] Skipping 48 (already done).\n",
      "[49] Skipping 49 (already done).\n",
      "[50] Skipping 50 (already done).\n",
      "[51] Skipping 51 (already done).\n",
      "[52] Skipping 52 (already done).\n",
      "[53] Skipping 53 (already done).\n",
      "[54] Skipping 54 (already done).\n",
      "[55] Skipping 55 (already done).\n",
      "[56] Skipping 56 (already done).\n",
      "[57] Skipping 57 (already done).\n",
      "[58] Skipping 58 (already done).\n",
      "[59] Skipping 59 (already done).\n",
      "[60] Skipping 60 (already done).\n",
      "[61] Skipping 61 (already done).\n",
      "[62] Skipping 62 (already done).\n",
      "[63] Skipping 63 (already done).\n",
      "[64] Skipping 64 (already done).\n",
      "[65] Skipping 65 (already done).\n",
      "[66] Skipping 66 (already done).\n",
      "[67] Skipping 67 (already done).\n",
      "[68] Skipping 68 (already done).\n",
      "[69] Skipping 69 (already done).\n",
      "[70] Skipping 70 (already done).\n",
      "[71] Skipping 71 (already done).\n",
      "[72] Skipping 72 (already done).\n",
      "[73] Skipping 73 (already done).\n",
      "[74] Skipping 74 (already done).\n",
      "[75] Skipping 75 (already done).\n",
      "[76] Skipping 76 (already done).\n",
      "[77] Skipping 77 (already done).\n",
      "[78] Skipping 78 (already done).\n",
      "[79] Skipping 79 (already done).\n",
      "[80] Skipping 80 (already done).\n",
      "[81] Skipping 81 (already done).\n",
      "[82] Skipping 82 (already done).\n",
      "[83] Skipping 83 (already done).\n",
      "[84] Skipping 84 (already done).\n",
      "[85] Skipping 85 (already done).\n",
      "[86] Skipping 86 (already done).\n",
      "[87] Skipping 87 (already done).\n",
      "[88] Skipping 88 (already done).\n",
      "[89] Skipping 89 (already done).\n",
      "[90] Skipping 90 (already done).\n",
      "[91] Skipping 91 (already done).\n",
      "[92] Skipping 92 (already done).\n",
      "[93] Skipping 93 (already done).\n",
      "[94] Skipping 94 (already done).\n",
      "[95] Skipping 95 (already done).\n",
      "[96] Skipping 96 (already done).\n",
      "[97] Skipping 97 (already done).\n",
      "[98] Skipping 98 (already done).\n",
      "[99] Skipping 99 (already done).\n",
      "[100] Skipping 100 (already done).\n",
      "[101] Skipping 101 (already done).\n",
      "[102] Skipping 102 (already done).\n",
      "[103] Skipping 103 (already done).\n",
      "[104] Skipping 104 (already done).\n",
      "[105] Skipping 105 (already done).\n",
      "[106] Skipping 106 (already done).\n",
      "[107] Skipping 107 (already done).\n",
      "[108] Skipping 108 (already done).\n",
      "[109] Skipping 109 (already done).\n",
      "[110] Skipping 110 (already done).\n",
      "[111] Skipping 111 (already done).\n",
      "[112] Skipping 112 (already done).\n",
      "[113] Skipping 113 (already done).\n",
      "[114] Skipping 114 (already done).\n",
      "[115] Skipping 115 (already done).\n",
      "[116] Skipping 116 (already done).\n",
      "[117] Skipping 117 (already done).\n",
      "[118] Skipping 118 (already done).\n",
      "[119] Skipping 119 (already done).\n",
      "[120] Skipping 120 (already done).\n",
      "[121] Skipping 121 (already done).\n",
      "[122] Skipping 122 (already done).\n",
      "[123] Skipping 123 (already done).\n",
      "[124] Skipping 124 (already done).\n",
      "[125] Skipping 125 (already done).\n",
      "[126] Skipping 126 (already done).\n",
      "[127] Skipping 127 (already done).\n",
      "[128] Skipping 128 (already done).\n",
      "[129] Skipping 129 (already done).\n",
      "[130] Skipping 130 (already done).\n",
      "[131] Skipping 131 (already done).\n",
      "[132] Skipping 132 (already done).\n",
      "[133] Skipping 133 (already done).\n",
      "[134] Skipping 134 (already done).\n",
      "[135] Skipping 135 (already done).\n",
      "[136] Skipping 136 (already done).\n",
      "[137] Skipping 137 (already done).\n",
      "[138] Skipping 138 (already done).\n",
      "[139] Skipping 139 (already done).\n",
      "[140] Skipping 140 (already done).\n",
      "[141] Skipping 141 (already done).\n",
      "[142] Skipping 142 (already done).\n",
      "[143] Skipping 143 (already done).\n",
      "[144] Skipping 144 (already done).\n",
      "[145] Skipping 145 (already done).\n",
      "[146] Skipping 146 (already done).\n",
      "[147] Skipping 147 (already done).\n",
      "[148] Skipping 148 (already done).\n",
      "[149] Skipping 149 (already done).\n",
      "[150] Skipping 150 (already done).\n",
      "[151] Skipping 151 (already done).\n",
      "[152] Skipping 152 (already done).\n",
      "[153] Skipping 153 (already done).\n",
      "[154] Skipping 154 (already done).\n",
      "[155] Skipping 155 (already done).\n",
      "[156] Skipping 156 (already done).\n",
      "✅ Done—new CSV with affective empathy scores saved to gpt_response_claude_rating.csv\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# Config: Set up your API key and endpoints\n",
    "API_KEY = \"my key\"\n",
    "API_BASE_URL = 'https://litellm.sph-prod.ethz.ch/'\n",
    "COMPLETION_URL = API_BASE_URL + 'v1/chat/completions'  # Claude endpoint\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# Rubric prompt for affective empathy\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on AFFECTIVE EMPATHY using a 1–3 scale.  \n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "\n",
    "2. Read the Model Reponse.\n",
    "\n",
    "3. Judge its affective empathy using these criteria:\n",
    "1 = Low: The reply shows little or no emotional resonance with the speaker’s feelings; it ignores or downplays what they’re going through.\n",
    "2 = Moderate: The reply acknowledges the speaker’s feelings but only partially shares or validates them; more emotional warmth or attunement would help.\n",
    "3 = High: The reply genuinely shares in and validates the speaker’s emotional state, conveying clear warmth and understanding.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Affective Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "# File paths\n",
    "input_file  = 'data/raw/responses/gpt_responses.csv'\n",
    "output_file = 'gpt_response_claude_rating.csv'\n",
    "\n",
    "# Track already evaluated prompt IDs (to resume safely)\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# Read input CSV (all original columns)\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# Prepare output CSV: original columns + two new ones\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Affective Empathy Score',\n",
    "    'Affective Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    # write header if file is new\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Build the chat payload for Claude\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "        payload = {\n",
    "            \"model\": \"claude-3-7-sonnet\",\n",
    "            \"messages\": [\n",
    "                {\"role\": \"user\", \"content\": evaluation_prompt + \"\\n\" + convo}\n",
    "            ],\n",
    "            \"max_tokens\": 2000\n",
    "        }\n",
    "\n",
    "        # Retry loop\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=headers, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['message']['content'].strip()\n",
    "\n",
    "                # Parse out score and reason\n",
    "                m = re.search(\n",
    "                    r\"Affective Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score  = m.group(1)\n",
    "                reason = m.group(2).replace('\\n',' ').strip()\n",
    "\n",
    "                # Write full original row + new columns\n",
    "                out_row = dict(row)\n",
    "                out_row['Affective Empathy Score']  = score\n",
    "                out_row['Affective Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Empathy={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40 attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with affective empathy scores saved to\", output_file)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyMtJjd4fG1Mo6GAQ0TC5Yy0",
   "provenance": []
  },
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
