{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<mark/>**Step 1: Prompt generation**<mark>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "collapsed": true,
    "executionInfo": {
     "elapsed": 2,
     "status": "ok",
     "timestamp": 1743773786241,
     "user": {
      "displayName": "Jeff",
      "userId": "15773939950998775573"
     },
     "user_tz": -120
    },
    "id": "uHZOZXZm-C8G",
    "outputId": "ebaa63b9-352a-442a-8904-7ab42b86c49b"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[156] prompts generated and saved to initial_prompts.csv.\n"
     ]
    }
   ],
   "source": [
    "import itertools\n",
    "import random\n",
    "import csv\n",
    "import os\n",
    "\n",
    "# Define prompt template (no emotional reference)\n",
    "prompt_template = \"\"\"\n",
    "You are a physician talking to a [age]-year-old [ethnicity] [gender] who recently underwent medical testing.\n",
    "They have a [education] and are about to be informed that they have been diagnosed with [diagnosis], which has [treatment_outlook].\n",
    "Write a short continuous speech as if you were speaking directly to the patient. Only output the speech itself.\n",
    "\"\"\"\n",
    "\n",
    "# Demographic parameters\n",
    "ethnicities = [\"European\", \"African\", \"Asian\"]\n",
    "genders     = [\"female\", \"male\"]\n",
    "educations  = [\"high school diploma or lower\", \"university degree\", \"medical degree\"]\n",
    "\n",
    "# (age, diagnosis, evidence‑backed outlook)\n",
    "age_diagnosis_outlook = [\n",
    "    # Obesity: WHO → chronic, manageable\n",
    "    (\"8\",  \"obesity\", \"a chronic condition manageable through lifestyle modifications and medical support\"),\n",
    "    (\"12\", \"obesity\", \"a chronic condition manageable through lifestyle modifications and medical support\"),\n",
    "    (\"30\", \"obesity\", \"a chronic condition manageable through lifestyle modifications and medical support\"),\n",
    "    (\"60\", \"obesity\", \"a chronic condition manageable through lifestyle modifications and medical support\"),\n",
    "\n",
    "    # Pancreatic cancer: ACS → ~13% 5‑yr survival\n",
    "    (\"50\", \"pancreatic cancer\", \"a limited 5‑year relative survival rate (~13%) despite treatment options\"),\n",
    "    # Older/advanced focus → palliative care\n",
    "    (\"75\", \"pancreatic cancer\", \"a limited 5‑year survival (~8% for advanced stages); focus on palliative care\"),\n",
    "\n",
    "    # Alzheimer’s: no cure, supportive/palliative (Alzheimer’s Assn.)\n",
    "    (\"70\", \"Alzheimer’s\", \"no cure available; supportive and palliative care to maintain quality of life\"),\n",
    "    (\"85\", \"Alzheimer’s\", \"no cure available; supportive and palliative care to maintain quality of life\"),\n",
    "\n",
    "    # Chronic Ischemic Heart Disease: manageable (AHA)\n",
    "    (\"50\", \"Chronic Ischemic Heart Disease\", \"manageable with medications, lifestyle changes, and possible revascularization to improve outcomes\"),\n",
    "    (\"80\", \"Chronic Ischemic Heart Disease\", \"manageable with medications, lifestyle changes, and possible revascularization to improve outcomes—though advanced age increases risk\"),\n",
    "]\n",
    "\n",
    "def is_valid_combination(age, education):\n",
    "    \"\"\"\n",
    "    - Children (<18) only 'high school diploma or lower'\n",
    "    - 'university degree' requires age ≥22\n",
    "    - 'medical degree' requires age ≥25\n",
    "    \"\"\"\n",
    "    age = int(age)\n",
    "    if age < 18 and education != \"high school diploma or lower\":\n",
    "        return False\n",
    "    if education == \"university degree\" and age < 22:\n",
    "        return False\n",
    "    if education == \"medical degree\" and age < 25:\n",
    "        return False\n",
    "    return True\n",
    "\n",
    "# Build only valid combos\n",
    "all_combinations = [\n",
    "    {\n",
    "        \"age\": age,\n",
    "        \"ethnicity\": eth,\n",
    "        \"gender\": gender,\n",
    "        \"education\": edu,\n",
    "        \"diagnosis\": diag,\n",
    "        \"treatment_outlook\": outlook,\n",
    "    }\n",
    "    for (age, diag, outlook), eth, gender, edu\n",
    "    in itertools.product(age_diagnosis_outlook, ethnicities, genders, educations)\n",
    "    if is_valid_combination(age, edu)\n",
    "]\n",
    "\n",
    "random.shuffle(all_combinations)\n",
    "\n",
    "# Write CSV\n",
    "csv_file = 'data/raw/prompts/initial_prompts.csv'\n",
    "file_exists = os.path.exists(csv_file)\n",
    "\n",
    "with open(csv_file, 'a', newline='', encoding='utf-8') as f:\n",
    "    writer = csv.DictWriter(f, fieldnames=[\n",
    "        \"Prompt Number\", \"age\", \"ethnicity\", \"gender\",\n",
    "        \"education\", \"diagnosis\", \"treatment_outlook\", \"Prompt Text\"\n",
    "    ])\n",
    "    if not file_exists or os.stat(csv_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for i, combo in enumerate(all_combinations, start=1):\n",
    "        prompt = prompt_template \\\n",
    "            .replace(\"[age]\", combo[\"age\"]) \\\n",
    "            .replace(\"[ethnicity]\", combo[\"ethnicity\"]) \\\n",
    "            .replace(\"[gender]\", combo[\"gender\"]) \\\n",
    "            .replace(\"[education]\", combo[\"education\"]) \\\n",
    "            .replace(\"[diagnosis]\", combo[\"diagnosis\"]) \\\n",
    "            .replace(\"[treatment_outlook]\", combo[\"treatment_outlook\"]) \\\n",
    "            .strip()\n",
    "\n",
    "        row = {\n",
    "            \"Prompt Number\": i,\n",
    "            **combo,\n",
    "            \"Prompt Text\": prompt\n",
    "        }\n",
    "        writer.writerow(row)\n",
    "\n",
    "print(f\"[{len(all_combinations)}] prompts generated and saved to {csv_file}.\")\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<mark/>**Step 2: Response generation**<mark>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "collapsed": true,
    "executionInfo": {
     "elapsed": 909895,
     "status": "ok",
     "timestamp": 1743774913132,
     "user": {
      "displayName": "Jeff",
      "userId": "15773939950998775573"
     },
     "user_tz": -120
    },
    "id": "j9qaBL7FdbL1",
    "outputId": "a99598ef-79e7-4732-a282-79da6e2ad67f"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1/156] Skipping 1 (already done).\n",
      "[2/156] Skipping 2 (already done).\n",
      "[3/156] Skipping 3 (already done).\n",
      "[4/156] Skipping 4 (already done).\n",
      "[5/156] Skipping 5 (already done).\n",
      "[6/156] Skipping 6 (already done).\n",
      "[7/156] Skipping 7 (already done).\n",
      "[8/156] Skipping 8 (already done).\n",
      "[9/156] Skipping 9 (already done).\n",
      "[10/156] Skipping 10 (already done).\n",
      "[11/156] Skipping 11 (already done).\n",
      "[12/156] Skipping 12 (already done).\n",
      "[13/156] Skipping 13 (already done).\n",
      "[14/156] Skipping 14 (already done).\n",
      "[15/156] Skipping 15 (already done).\n",
      "[16/156] Skipping 16 (already done).\n",
      "[17/156] Skipping 17 (already done).\n",
      "[18/156] Skipping 18 (already done).\n",
      "[19/156] Skipping 19 (already done).\n",
      "[20/156] Skipping 20 (already done).\n",
      "[21/156] Skipping 21 (already done).\n",
      "[22/156] Skipping 22 (already done).\n",
      "[23/156] Skipping 23 (already done).\n",
      "[24/156] Skipping 24 (already done).\n",
      "[25/156] Skipping 25 (already done).\n",
      "[26/156] Skipping 26 (already done).\n",
      "[27/156] Skipping 27 (already done).\n",
      "[28/156] Skipping 28 (already done).\n",
      "[29/156] Skipping 29 (already done).\n",
      "[30/156] Skipping 30 (already done).\n",
      "[31/156] Skipping 31 (already done).\n",
      "[32/156] Skipping 32 (already done).\n",
      "[33/156] Skipping 33 (already done).\n",
      "[34/156] Skipping 34 (already done).\n",
      "[35/156] Skipping 35 (already done).\n",
      "[36/156] Skipping 36 (already done).\n",
      "[37/156] Skipping 37 (already done).\n",
      "[38/156] Skipping 38 (already done).\n",
      "[39/156] Skipping 39 (already done).\n",
      "[40/156] Skipping 40 (already done).\n",
      "[41/156] Skipping 41 (already done).\n",
      "[42/156] Skipping 42 (already done).\n",
      "[43/156] Skipping 43 (already done).\n",
      "[44/156] Skipping 44 (already done).\n",
      "[45/156] Skipping 45 (already done).\n",
      "[46/156] Skipping 46 (already done).\n",
      "[47/156] Skipping 47 (already done).\n",
      "[48/156] Skipping 48 (already done).\n",
      "[49/156] Skipping 49 (already done).\n",
      "[50/156] Skipping 50 (already done).\n",
      "[51/156] Skipping 51 (already done).\n",
      "[52/156] Skipping 52 (already done).\n",
      "[53/156] Skipping 53 (already done).\n",
      "[54/156] Skipping 54 (already done).\n",
      "[55/156] Skipping 55 (already done).\n",
      "[56/156] Skipping 56 (already done).\n",
      "[57/156] Skipping 57 (already done).\n",
      "[58/156] Skipping 58 (already done).\n",
      "[59/156] Skipping 59 (already done).\n",
      "[60/156] Skipping 60 (already done).\n",
      "[61/156] Skipping 61 (already done).\n",
      "[62/156] Skipping 62 (already done).\n",
      "[63/156] Skipping 63 (already done).\n",
      "[64/156] Skipping 64 (already done).\n",
      "[65/156] Skipping 65 (already done).\n",
      "[66/156] Skipping 66 (already done).\n",
      "[67/156] Skipping 67 (already done).\n",
      "[68/156] Skipping 68 (already done).\n",
      "[69/156] Skipping 69 (already done).\n",
      "[70/156] Skipping 70 (already done).\n",
      "[71/156] Skipping 71 (already done).\n",
      "[72/156] Skipping 72 (already done).\n",
      "[73/156] Skipping 73 (already done).\n",
      "[74/156] Skipping 74 (already done).\n",
      "[75/156] Skipping 75 (already done).\n",
      "[76/156] Skipping 76 (already done).\n",
      "[77/156] Skipping 77 (already done).\n",
      "[78/156] Skipping 78 (already done).\n",
      "[79/156] Skipping 79 (already done).\n",
      "[80/156] Skipping 80 (already done).\n",
      "[81/156] Skipping 81 (already done).\n",
      "[82/156] Skipping 82 (already done).\n",
      "[83/156] Skipping 83 (already done).\n",
      "[84/156] Skipping 84 (already done).\n",
      "[85/156] Skipping 85 (already done).\n",
      "[86/156] Skipping 86 (already done).\n",
      "[87/156] Skipping 87 (already done).\n",
      "[88/156] Skipping 88 (already done).\n",
      "[89/156] Skipping 89 (already done).\n",
      "[90/156] Skipping 90 (already done).\n",
      "[91/156] Skipping 91 (already done).\n",
      "[92/156] Skipping 92 (already done).\n",
      "[93/156] Skipping 93 (already done).\n",
      "[94/156] Skipping 94 (already done).\n",
      "[95/156] Skipping 95 (already done).\n",
      "[96/156] Requesting response for Prompt 96...\n",
      "[97/156] Requesting response for Prompt 97...\n",
      "[98/156] Requesting response for Prompt 98...\n",
      "[99/156] Requesting response for Prompt 99...\n",
      "[100/156] Requesting response for Prompt 100...\n",
      "[101/156] Requesting response for Prompt 101...\n",
      "[102/156] Requesting response for Prompt 102...\n",
      "[103/156] Requesting response for Prompt 103...\n",
      "[104/156] Requesting response for Prompt 104...\n",
      "[105/156] Requesting response for Prompt 105...\n",
      "[106/156] Requesting response for Prompt 106...\n",
      "[107/156] Requesting response for Prompt 107...\n",
      "[108/156] Requesting response for Prompt 108...\n",
      "[109/156] Requesting response for Prompt 109...\n",
      "[110/156] Requesting response for Prompt 110...\n",
      "[111/156] Requesting response for Prompt 111...\n",
      "[112/156] Requesting response for Prompt 112...\n",
      "[113/156] Requesting response for Prompt 113...\n",
      "[114/156] Requesting response for Prompt 114...\n",
      "[115/156] Requesting response for Prompt 115...\n",
      "[116/156] Requesting response for Prompt 116...\n",
      "[117/156] Requesting response for Prompt 117...\n",
      "[118/156] Requesting response for Prompt 118...\n",
      "[119/156] Requesting response for Prompt 119...\n",
      "[120/156] Requesting response for Prompt 120...\n",
      "[121/156] Requesting response for Prompt 121...\n",
      "[122/156] Requesting response for Prompt 122...\n",
      "[123/156] Requesting response for Prompt 123...\n",
      "[124/156] Requesting response for Prompt 124...\n",
      "[125/156] Requesting response for Prompt 125...\n",
      "[126/156] Requesting response for Prompt 126...\n",
      "[127/156] Requesting response for Prompt 127...\n",
      "[128/156] Requesting response for Prompt 128...\n",
      "[129/156] Requesting response for Prompt 129...\n",
      "[130/156] Requesting response for Prompt 130...\n",
      "[131/156] Requesting response for Prompt 131...\n",
      "[132/156] Requesting response for Prompt 132...\n",
      "[133/156] Requesting response for Prompt 133...\n",
      "[134/156] Requesting response for Prompt 134...\n",
      "[135/156] Requesting response for Prompt 135...\n",
      "[136/156] Requesting response for Prompt 136...\n",
      "[137/156] Requesting response for Prompt 137...\n",
      "[138/156] Requesting response for Prompt 138...\n",
      "[139/156] Requesting response for Prompt 139...\n",
      "[140/156] Requesting response for Prompt 140...\n",
      "[141/156] Requesting response for Prompt 141...\n",
      "[142/156] Requesting response for Prompt 142...\n",
      "[143/156] Requesting response for Prompt 143...\n",
      "[144/156] Requesting response for Prompt 144...\n",
      "[145/156] Requesting response for Prompt 145...\n",
      "[146/156] Requesting response for Prompt 146...\n",
      "[147/156] Requesting response for Prompt 147...\n",
      "[148/156] Requesting response for Prompt 148...\n",
      "[149/156] Requesting response for Prompt 149...\n",
      "[150/156] Requesting response for Prompt 150...\n",
      "[151/156] Requesting response for Prompt 151...\n",
      "[152/156] Requesting response for Prompt 152...\n",
      "[153/156] Requesting response for Prompt 153...\n",
      "[154/156] Requesting response for Prompt 154...\n",
      "[155/156] Requesting response for Prompt 155...\n",
      "[156/156] Requesting response for Prompt 156...\n",
      "✅ All prompts processed.\n",
      "CPU times: user 613 ms, sys: 170 ms, total: 783 ms\n",
      "Wall time: 6min 56s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "import csv\n",
    "import os\n",
    "import time\n",
    "import requests\n",
    "\n",
    "# ─── Configuration ─────────────────────────────────────────────────────────────\n",
    "\n",
    "API_KEY        = \"\"  # ← set your key here\n",
    "API_BASE_URL   = 'https://litellm.sph-prod.ethz.ch/'\n",
    "COMPLETION_URL = API_BASE_URL + 'completions'\n",
    "HEADERS        = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "INPUT_CSV      = 'data/raw/prompts/initial_prompts.csv'\n",
    "OUTPUT_CSV     = 'data/raw/responses/gpt_responses.csv'\n",
    "MODEL_NAME     = 'gpt-4o'\n",
    "DELAY_SECONDS  = 1.2\n",
    "\n",
    "# ─── Load already‑processed IDs ────────────────────────────────────────────────\n",
    "\n",
    "processed_ids = set()\n",
    "if os.path.exists(OUTPUT_CSV):\n",
    "    with open(OUTPUT_CSV, newline='', encoding='utf-8') as f_out:\n",
    "        reader_out = csv.DictReader(f_out)\n",
    "        for row in reader_out:\n",
    "            processed_ids.add(row['Prompt Number'])\n",
    "\n",
    "# ─── Read input prompts ─────────────────────────────────────────────────────────\n",
    "\n",
    "with open(INPUT_CSV, newline='', encoding='utf-8') as f_in:\n",
    "    reader_in = list(csv.DictReader(f_in))\n",
    "    input_fieldnames = reader_in[0].keys()\n",
    "\n",
    "# ─── Prepare output file ────────────────────────────────────────────────────────\n",
    "\n",
    "output_fieldnames = list(input_fieldnames) + ['Model Response']\n",
    "first_write = not os.path.exists(OUTPUT_CSV) or os.stat(OUTPUT_CSV).st_size == 0\n",
    "\n",
    "f_out = open(OUTPUT_CSV, 'a', newline='', encoding='utf-8')\n",
    "writer_out = csv.DictWriter(f_out, fieldnames=output_fieldnames)\n",
    "if first_write:\n",
    "    writer_out.writeheader()\n",
    "\n",
    "# ─── Generate & write responses ────────────────────────────────────────────────\n",
    "\n",
    "total = len(reader_in)\n",
    "for idx, row in enumerate(reader_in, start=1):\n",
    "    pid = row['Prompt Number']\n",
    "    if pid in processed_ids:\n",
    "        print(f\"[{idx}/{total}] Skipping {pid} (already done).\")\n",
    "        continue\n",
    "\n",
    "    prompt = row['Prompt Text']\n",
    "    print(f\"[{idx}/{total}] Requesting response for Prompt {pid}...\")\n",
    "\n",
    "    try:\n",
    "        resp = requests.post(\n",
    "            COMPLETION_URL,\n",
    "            headers=HEADERS,\n",
    "            json={\n",
    "                \"model\": MODEL_NAME,\n",
    "                \"prompt\": prompt,\n",
    "                \"max_tokens\": 2000\n",
    "            }\n",
    "        )\n",
    "        resp.raise_for_status()\n",
    "        data = resp.json()\n",
    "        text = data.get(\"choices\", [{}])[0].get(\"text\", \"\").strip()\n",
    "\n",
    "        # If you wrap your model output in delimiters (e.g. ---response---), adjust here.\n",
    "        # For a simple pass‑through:\n",
    "        model_out = text\n",
    "\n",
    "    except Exception as e:\n",
    "        print(f\"⚠️ Error on Prompt {pid}: {e}\")\n",
    "        model_out = \"[Error fetching response]\"\n",
    "\n",
    "    # Merge original row + response\n",
    "    out_row = { **row, 'Model Response': model_out }\n",
    "    writer_out.writerow(out_row)\n",
    "    f_out.flush()\n",
    "    time.sleep(DELAY_SECONDS)\n",
    "\n",
    "f_out.close()\n",
    "print(\"✅ All prompts processed.\")    \n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<mark/>**Step 3: Response evaluation**<mark>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GPT evaluates gpt's response [affective empathy]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# Config: Set up your API key and endpoints\n",
    "API_KEY = \"\"\n",
    "\n",
    "COMPLETION_URL = API_BASE_URL + 'completions'\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# Rubric prompt for affective empathy\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on AFFECTIVE EMPATHY using a 1–3 scale.  \n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "\n",
    "2. Read the Model Reponse.\n",
    "\n",
    "3. Judge its affective empathy using these criteria:\n",
    "1 = Low: The reply shows little or no emotional resonance with the speaker’s feelings; it ignores or downplays what they’re going through.\n",
    "2 = Moderate: The reply acknowledges the speaker’s feelings but only partially shares or validates them; more emotional warmth or attunement would help.\n",
    "3 = High: The reply genuinely shares in and validates the speaker’s emotional state, conveying clear warmth and understanding.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Affective Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "# File paths\n",
    "input_file  = 'data/raw/responses/gpt_responses.csv'\n",
    "output_file = 'gpt_response_gpt_rating.csv'\n",
    "\n",
    "# Track already evaluated prompt IDs (to resume safely)\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# Read input CSV (all original columns)\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# Prepare output CSV: original columns + two new ones\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Affective Empathy Score',\n",
    "    'Affective Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    # write header if file is new\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Build the LLM prompt\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "        payload = {\n",
    "            \"model\": \"gpt-4o\",\n",
    "            \"prompt\": evaluation_prompt + \"\\n\" + convo,\n",
    "            \"max_tokens\": 2000\n",
    "        }\n",
    "\n",
    "        # Retry loop\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=headers, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['text'].strip()\n",
    "\n",
    "                # Parse out score and reason\n",
    "                m = re.search(\n",
    "                    r\"Affective Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score  = m.group(1)\n",
    "                reason = m.group(2).replace('\\n',' ').strip()\n",
    "\n",
    "                # Write full original row + new columns\n",
    "                out_row = dict(row)\n",
    "                out_row['Affective Empathy Score']  = score\n",
    "                out_row['Affective Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Empathy={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40' attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with affective empathy scores saved to\", output_file)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GPT evaluates Claude's response [affective empathy]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 1000
    },
    "collapsed": true,
    "executionInfo": {
     "elapsed": 315146,
     "status": "error",
     "timestamp": 1743788470642,
     "user": {
      "displayName": "Jeff",
      "userId": "15773939950998775573"
     },
     "user_tz": -120
    },
    "id": "zTvqz0Qp_srV",
    "outputId": "ca539972-dafd-415d-c79f-fa80c3e06411"
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] Skipping 1 (already done).\n",
      "[2] Skipping 2 (already done).\n",
      "[3] Skipping 3 (already done).\n",
      "[4] Skipping 4 (already done).\n",
      "[5] Skipping 5 (already done).\n",
      "[6] Skipping 6 (already done).\n",
      "[7] Skipping 7 (already done).\n",
      "[8] Skipping 8 (already done).\n",
      "[9] Skipping 9 (already done).\n",
      "[10] Skipping 10 (already done).\n",
      "[11] Skipping 11 (already done).\n",
      "[12] Skipping 12 (already done).\n",
      "[13] Skipping 13 (already done).\n",
      "[14] Skipping 14 (already done).\n",
      "[15] Skipping 15 (already done).\n",
      "[16] Skipping 16 (already done).\n",
      "[17] Skipping 17 (already done).\n",
      "[18] Skipping 18 (already done).\n",
      "[19] Skipping 19 (already done).\n",
      "[20] Skipping 20 (already done).\n",
      "[21] Skipping 21 (already done).\n",
      "[22] Skipping 22 (already done).\n",
      "[23] Skipping 23 (already done).\n",
      "[24] Skipping 24 (already done).\n",
      "[25] Skipping 25 (already done).\n",
      "[26] Skipping 26 (already done).\n",
      "[27] Skipping 27 (already done).\n",
      "[28] Skipping 28 (already done).\n",
      "[29] Skipping 29 (already done).\n",
      "[30] Skipping 30 (already done).\n",
      "[31] Skipping 31 (already done).\n",
      "[32] Skipping 32 (already done).\n",
      "[33] Skipping 33 (already done).\n",
      "[34] Skipping 34 (already done).\n",
      "[35] Skipping 35 (already done).\n",
      "[36] Skipping 36 (already done).\n",
      "[37] Skipping 37 (already done).\n",
      "[38] Skipping 38 (already done).\n",
      "[39] Skipping 39 (already done).\n",
      "[40] Skipping 40 (already done).\n",
      "[41] Skipping 41 (already done).\n",
      "[42] Skipping 42 (already done).\n",
      "[43] Skipping 43 (already done).\n",
      "[44] Skipping 44 (already done).\n",
      "[45] Skipping 45 (already done).\n",
      "[46] Skipping 46 (already done).\n",
      "[47] Skipping 47 (already done).\n",
      "[48] Skipping 48 (already done).\n",
      "[49] Skipping 49 (already done).\n",
      "[50] Skipping 50 (already done).\n",
      "[51] Skipping 51 (already done).\n",
      "[52] Skipping 52 (already done).\n",
      "[53] Skipping 53 (already done).\n",
      "[54] Skipping 54 (already done).\n",
      "[55] Skipping 55 (already done).\n",
      "[56] Skipping 56 (already done).\n",
      "[57] Skipping 57 (already done).\n",
      "[58] Skipping 58 (already done).\n",
      "[59] Skipping 59 (already done).\n",
      "[60] Skipping 60 (already done).\n",
      "[61] Skipping 61 (already done).\n",
      "[62] Skipping 62 (already done).\n",
      "[63] Skipping 63 (already done).\n",
      "[64] Skipping 64 (already done).\n",
      "[65] Skipping 65 (already done).\n",
      "[66] Skipping 66 (already done).\n",
      "[67] Skipping 67 (already done).\n",
      "[68] Skipping 68 (already done).\n",
      "[69] Skipping 69 (already done).\n",
      "[70] Skipping 70 (already done).\n",
      "[71] Skipping 71 (already done).\n",
      "[72] Skipping 72 (already done).\n",
      "[73] Skipping 73 (already done).\n",
      "[74] Skipping 74 (already done).\n",
      "[75] Skipping 75 (already done).\n",
      "[76] Skipping 76 (already done).\n",
      "[77] Skipping 77 (already done).\n",
      "[78] Skipping 78 (already done).\n",
      "[79] Skipping 79 (already done).\n",
      "[80] Skipping 80 (already done).\n",
      "[81] Skipping 81 (already done).\n",
      "[82] Skipping 82 (already done).\n",
      "[83] Skipping 83 (already done).\n",
      "[84] Skipping 84 (already done).\n",
      "[85] Skipping 85 (already done).\n",
      "[86] Skipping 86 (already done).\n",
      "[87] Skipping 87 (already done).\n",
      "[88] Skipping 88 (already done).\n",
      "[89] Skipping 89 (already done).\n",
      "[90] Skipping 90 (already done).\n",
      "[91] Skipping 91 (already done).\n",
      "[92] Skipping 92 (already done).\n",
      "[93] Skipping 93 (already done).\n",
      "[94] Skipping 94 (already done).\n",
      "[95] Skipping 95 (already done).\n",
      "[96] Skipping 96 (already done).\n",
      "[97] Skipping 97 (already done).\n",
      "[98] Skipping 98 (already done).\n",
      "[99] Skipping 99 (already done).\n",
      "[100] Skipping 100 (already done).\n",
      "[101] Skipping 101 (already done).\n",
      "[102] Skipping 102 (already done).\n",
      "[103] Skipping 103 (already done).\n",
      "[104] Skipping 104 (already done).\n",
      "[105] Skipping 105 (already done).\n",
      "[106] Skipping 106 (already done).\n",
      "[107] Skipping 107 (already done).\n",
      "[108] Skipping 108 (already done).\n",
      "[109] Skipping 109 (already done).\n",
      "[110] Skipping 110 (already done).\n",
      "[111] Skipping 111 (already done).\n",
      "[112] Skipping 112 (already done).\n",
      "[113] Skipping 113 (already done).\n",
      "[114] Skipping 114 (already done).\n",
      "[115] Skipping 115 (already done).\n",
      "[116] Skipping 116 (already done).\n",
      "[117] Skipping 117 (already done).\n",
      "[118] Skipping 118 (already done).\n",
      "[119] Skipping 119 (already done).\n",
      "[120] Skipping 120 (already done).\n",
      "[121] Skipping 121 (already done).\n",
      "[122] Skipping 122 (already done).\n",
      "[123] Skipping 123 (already done).\n",
      "[124] Skipping 124 (already done).\n",
      "[125] Skipping 125 (already done).\n",
      "[126] Skipping 126 (already done).\n",
      "[127] Skipping 127 (already done).\n",
      "[128] Skipping 128 (already done).\n",
      "[129] Skipping 129 (already done).\n",
      "[130] Skipping 130 (already done).\n",
      "[131] Skipping 131 (already done).\n",
      "[132] Skipping 132 (already done).\n",
      "[133] Skipping 133 (already done).\n",
      "[134] Skipping 134 (already done).\n",
      "[135] Skipping 135 (already done).\n",
      "[136] Skipping 136 (already done).\n",
      "[137] Skipping 137 (already done).\n",
      "[138] Skipping 138 (already done).\n",
      "[139] Skipping 139 (already done).\n",
      "[140] Skipping 140 (already done).\n",
      "[141] Skipping 141 (already done).\n",
      "[142] Skipping 142 (already done).\n",
      "[143] Skipping 143 (already done).\n",
      "[144] Skipping 144 (already done).\n",
      "[145] Skipping 145 (already done).\n",
      "[146] Skipping 146 (already done).\n",
      "[147] Skipping 147 (already done).\n",
      "[148] Skipping 148 (already done).\n",
      "[149] Skipping 149 (already done).\n",
      "[150] Skipping 150 (already done).\n",
      "[151] Skipping 151 (already done).\n",
      "[152] Skipping 152 (already done).\n",
      "[153] Skipping 153 (already done).\n",
      "[154] Skipping 154 (already done).\n",
      "[155] Skipping 155 (already done).\n",
      "[156] Skipping 156 (already done).\n",
      "✅ Done—new CSV with affective empathy scores saved to claude_response_gpt_rating.csv\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# Config: Set up your API key and endpoints\n",
    "API_KEY = \"\"\n",
    "\n",
    "COMPLETION_URL = API_BASE_URL + 'completions'\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# Rubric prompt for affective empathy\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on AFFECTIVE EMPATHY using a 1–3 scale.  \n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "\n",
    "2. Read the Model Reponse.\n",
    "\n",
    "3. Judge its affective empathy using these criteria:\n",
    "1 = Low: The reply shows little or no emotional resonance with the speaker’s feelings; it ignores or downplays what they’re going through.\n",
    "2 = Moderate: The reply acknowledges the speaker’s feelings but only partially shares or validates them; more emotional warmth or attunement would help.\n",
    "3 = High: The reply genuinely shares in and validates the speaker’s emotional state, conveying clear warmth and understanding.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Affective Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "# File paths\n",
    "input_file  = 'data/raw/responses/claude_responses_empathy.csv'\n",
    "output_file = 'claude_response_gpt_rating.csv'\n",
    "\n",
    "# Track already evaluated prompt IDs (to resume safely)\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# Read input CSV (all original columns)\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# Prepare output CSV: original columns + two new ones\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Affective Empathy Score',\n",
    "    'Affective Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    # write header if file is new\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Build the LLM prompt\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "        payload = {\n",
    "            \"model\": \"gpt-4o\",\n",
    "            \"prompt\": evaluation_prompt + \"\\n\" + convo,\n",
    "            \"max_tokens\": 2000\n",
    "        }\n",
    "\n",
    "        # Retry loop\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=headers, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['text'].strip()\n",
    "\n",
    "                # Parse out score and reason\n",
    "                m = re.search(\n",
    "                    r\"Affective Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score  = m.group(1)\n",
    "                reason = m.group(2).replace('\\n',' ').strip()\n",
    "\n",
    "                # Write full original row + new columns\n",
    "                out_row = dict(row)\n",
    "                out_row['Affective Empathy Score']  = score\n",
    "                out_row['Affective Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Empathy={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40' attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with affective empathy scores saved to\", output_file)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GPT evaluates gpt's response [cognitive understandability]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] ✅ 1 → Empathy=3\n",
      "[2] ✅ 2 → Empathy=3\n",
      "[3] ✅ 3 → Empathy=3\n",
      "[4] ✅ 4 → Empathy=3\n",
      "[5] ✅ 5 → Empathy=3\n",
      "[6] ✅ 6 → Empathy=3\n",
      "[7] ✅ 7 → Empathy=3\n",
      "[8] ✅ 8 → Empathy=3\n",
      "[9] ✅ 9 → Empathy=3\n",
      "[10] ✅ 10 → Empathy=3\n",
      "[11] ✅ 11 → Empathy=3\n",
      "[12] ✅ 12 → Empathy=3\n",
      "[13] ✅ 13 → Empathy=3\n",
      "[14] ✅ 14 → Empathy=3\n",
      "[15] ✅ 15 → Empathy=3\n",
      "[16] ✅ 16 → Empathy=3\n",
      "[17] ✅ 17 → Empathy=3\n",
      "[18] ✅ 18 → Empathy=3\n",
      "[19] ✅ 19 → Empathy=3\n",
      "[20] ✅ 20 → Empathy=3\n",
      "[21] ✅ 21 → Empathy=3\n",
      "[22] ✅ 22 → Empathy=3\n",
      "[23] ✅ 23 → Empathy=3\n",
      "[24] ✅ 24 → Empathy=3\n",
      "[25] ✅ 25 → Empathy=3\n",
      "[26] ✅ 26 → Empathy=3\n",
      "[27] ✅ 27 → Empathy=3\n",
      "[28] ✅ 29 → Empathy=3\n",
      "[29] ✅ 30 → Empathy=3\n",
      "[30] ✅ 31 → Empathy=3\n",
      "[31] ✅ 32 → Empathy=3\n",
      "[32] ✅ 33 → Empathy=3\n",
      "[33] ✅ 34 → Empathy=3\n",
      "[34] ✅ 35 → Empathy=3\n",
      "[35] ✅ 36 → Empathy=3\n",
      "[36] ✅ 37 → Empathy=3\n",
      "[37] ✅ 38 → Empathy=3\n",
      "[38] ✅ 39 → Empathy=3\n",
      "[39] ✅ 40 → Empathy=2\n",
      "[40] ✅ 41 → Empathy=3\n",
      "[41] ✅ 42 → Empathy=3\n",
      "[42] ✅ 43 → Empathy=3\n",
      "[43] ✅ 44 → Empathy=3\n",
      "[44] ✅ 45 → Empathy=3\n",
      "[45] ✅ 46 → Empathy=3\n",
      "[46] ✅ 47 → Empathy=2\n",
      "[47] ✅ 48 → Empathy=3\n",
      "[48] ✅ 50 → Empathy=3\n",
      "[49] ✅ 51 → Empathy=3\n",
      "[50] ✅ 52 → Empathy=3\n",
      "[51] ⚠️ Attempt 1 for 53 failed: 504 Server Error: Gateway Time-out for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[51] ✅ 53 → Empathy=2\n",
      "[52] ✅ 54 → Empathy=3\n",
      "[53] ✅ 55 → Empathy=3\n",
      "[54] ✅ 56 → Empathy=3\n",
      "[55] ✅ 57 → Empathy=3\n",
      "[56] ✅ 58 → Empathy=3\n",
      "[57] ✅ 59 → Empathy=3\n",
      "[58] ✅ 60 → Empathy=3\n",
      "[59] ✅ 61 → Empathy=3\n",
      "[60] ✅ 62 → Empathy=3\n",
      "[61] ✅ 63 → Empathy=3\n",
      "[62] ✅ 64 → Empathy=3\n",
      "[63] ✅ 65 → Empathy=2\n",
      "[64] ✅ 66 → Empathy=3\n",
      "[65] ✅ 68 → Empathy=3\n",
      "[66] ✅ 69 → Empathy=3\n",
      "[67] ✅ 70 → Empathy=3\n",
      "[68] ✅ 71 → Empathy=3\n",
      "[69] ✅ 72 → Empathy=2\n",
      "[70] ✅ 73 → Empathy=3\n",
      "[71] ✅ 74 → Empathy=3\n",
      "[72] ✅ 75 → Empathy=3\n",
      "[73] ✅ 76 → Empathy=3\n",
      "[74] ✅ 77 → Empathy=3\n",
      "[75] ✅ 78 → Empathy=3\n",
      "[76] ✅ 79 → Empathy=2\n",
      "[77] ✅ 80 → Empathy=3\n",
      "[78] ✅ 81 → Empathy=2\n",
      "[79] ✅ 82 → Empathy=3\n",
      "[80] ✅ 83 → Empathy=3\n",
      "[81] ✅ 85 → Empathy=3\n",
      "[82] ✅ 86 → Empathy=3\n",
      "[83] ✅ 87 → Empathy=3\n",
      "[84] ✅ 88 → Empathy=2\n",
      "[85] ✅ 89 → Empathy=3\n",
      "[86] ✅ 90 → Empathy=3\n",
      "[87] ⚠️ Attempt 1 for 91 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[87] ✅ 91 → Empathy=2\n",
      "[88] ✅ 92 → Empathy=3\n",
      "[89] ✅ 93 → Empathy=3\n",
      "[90] ✅ 94 → Empathy=3\n",
      "[91] ✅ 95 → Empathy=3\n",
      "[92] ✅ 99 → Empathy=3\n",
      "[93] ✅ 100 → Empathy=3\n",
      "[94] ✅ 101 → Empathy=3\n",
      "[95] ✅ 102 → Empathy=3\n",
      "[96] ✅ 103 → Empathy=3\n",
      "[97] ✅ 104 → Empathy=3\n",
      "[98] ✅ 105 → Empathy=3\n",
      "[99] ✅ 106 → Empathy=3\n",
      "[100] ✅ 107 → Empathy=3\n",
      "[101] ✅ 108 → Empathy=2\n",
      "[102] ✅ 109 → Empathy=3\n",
      "[103] ✅ 28 → Empathy=3\n",
      "[104] ✅ 49 → Empathy=3\n",
      "[105] ✅ 67 → Empathy=3\n",
      "[106] ✅ 84 → Empathy=2\n",
      "[107] ⚠️ Attempt 1 for 96 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[107] ⚠️ Attempt 2 for 96 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[107] ⚠️ Attempt 3 for 96 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[107] ✅ 96 → Empathy=2\n",
      "[108] ✅ 97 → Empathy=3\n",
      "[109] ✅ 98 → Empathy=3\n",
      "[110] ✅ 110 → Empathy=3\n",
      "[111] ✅ 111 → Empathy=3\n",
      "[112] ✅ 112 → Empathy=3\n",
      "[113] ✅ 113 → Empathy=3\n",
      "[114] ✅ 114 → Empathy=3\n",
      "[115] ✅ 115 → Empathy=3\n",
      "[116] ✅ 116 → Empathy=3\n",
      "[117] ✅ 117 → Empathy=3\n",
      "[118] ✅ 118 → Empathy=3\n",
      "[119] ✅ 119 → Empathy=3\n",
      "[120] ✅ 120 → Empathy=3\n",
      "[121] ✅ 121 → Empathy=2\n",
      "[122] ✅ 122 → Empathy=3\n",
      "[123] ✅ 123 → Empathy=3\n",
      "[124] ✅ 124 → Empathy=3\n",
      "[125] ✅ 125 → Empathy=3\n",
      "[126] ✅ 126 → Empathy=3\n",
      "[127] ⚠️ Attempt 1 for 127 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[127] ⚠️ Attempt 2 for 127 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[127] ⚠️ Attempt 3 for 127 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[127] ✅ 127 → Empathy=3\n",
      "[128] ✅ 128 → Empathy=3\n",
      "[129] ✅ 129 → Empathy=3\n",
      "[130] ✅ 130 → Empathy=3\n",
      "[131] ✅ 131 → Empathy=3\n",
      "[132] ✅ 132 → Empathy=3\n",
      "[133] ✅ 133 → Empathy=2\n",
      "[134] ✅ 134 → Empathy=3\n",
      "[135] ✅ 135 → Empathy=3\n",
      "[136] ✅ 136 → Empathy=3\n",
      "[137] ✅ 137 → Empathy=3\n",
      "[138] ✅ 138 → Empathy=3\n",
      "[139] ✅ 139 → Empathy=3\n",
      "[140] ✅ 140 → Empathy=3\n",
      "[141] ✅ 141 → Empathy=3\n",
      "[142] ✅ 142 → Empathy=3\n",
      "[143] ✅ 143 → Empathy=3\n",
      "[144] ✅ 144 → Empathy=3\n",
      "[145] ✅ 145 → Empathy=3\n",
      "[146] ✅ 146 → Empathy=2\n",
      "[147] ⚠️ Attempt 1 for 147 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[147] ✅ 147 → Empathy=3\n",
      "[148] ✅ 148 → Empathy=3\n",
      "[149] ✅ 149 → Empathy=3\n",
      "[150] ✅ 150 → Empathy=3\n",
      "[151] ✅ 151 → Empathy=3\n",
      "[152] ✅ 152 → Empathy=3\n",
      "[153] ✅ 153 → Empathy=3\n",
      "[154] ✅ 154 → Empathy=3\n",
      "[155] ✅ 155 → Empathy=3\n",
      "[156] ✅ 156 → Empathy=3\n",
      "✅ Done—new CSV with affective empathy scores saved to gpt_response_gpt_rating_understanding.csv\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# Config: Set up your API key and endpoints\n",
    "API_KEY = \"\"\n",
    "\n",
    "COMPLETION_URL = API_BASE_URL + 'completions'\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# Refined rubric prompt for cognitive Empathy\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on COGNITIVE Empathy using a 1–3 scale.\n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "\n",
    "2. Read the Model Response.\n",
    "\n",
    "3. Judge its cognitive Empathy using these criteria:\n",
    "\n",
    "1 = Low: The response overlooks or misrepresents the speaker’s perspective. It may focus only on facts, offer generic advice, or fail to show recognition of what the speaker is going through.\n",
    "\n",
    "2 = Moderate: The response makes an effort to engage with the speaker’s experience but lacks nuance. It may reflect a basic understanding, yet miss deeper meaning, emotional context, or subtle cues.\n",
    "\n",
    "3 = High: The response clearly reflects the speaker’s situation or emotional state with insight and precision. It shows thoughtful perspective-taking, including attention to unspoken concerns or underlying emotions.\n",
    "\n",
    "Be deliberate and use the full scale—only give a 3 when the response shows specific evidence of deep cognitive Empathy.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Cognitive Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "# File paths\n",
    "input_file  = 'gpt_response_gpt_rating.csv'\n",
    "output_file = 'gpt_response_gpt_rating_understanding.csv'\n",
    "\n",
    "# Track already evaluated prompt IDs (to resume safely)\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# Read input CSV (all original columns)\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# Prepare output CSV: original columns + two new ones\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Cognitive Empathy Score',\n",
    "    'Cognitive Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    # write header if file is new\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Build the LLM prompt\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "        payload = {\n",
    "            \"model\": \"gpt-4o\",\n",
    "            \"prompt\": evaluation_prompt + \"\\n\" + convo,\n",
    "            \"max_tokens\": 2000\n",
    "        }\n",
    "\n",
    "        # Retry loop\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=headers, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['text'].strip()\n",
    "\n",
    "                # Parse out score and reason\n",
    "                m = re.search(\n",
    "                    r\"Cognitive Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score  = m.group(1)\n",
    "                reason = m.group(2).replace('\\n',' ').strip()\n",
    "\n",
    "                # Write full original row + new columns\n",
    "                out_row = dict(row)\n",
    "                out_row['Cognitive Empathy Score']  = score\n",
    "                out_row['Cognitive Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Empathy={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40' attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with affective empathy scores saved to\", output_file)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# GPT evaluates claude's response [cognitive understandability]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] ✅ 1 → Empathy=3\n",
      "[2] ✅ 2 → Empathy=3\n",
      "[3] ✅ 3 → Empathy=2\n",
      "[4] ✅ 4 → Empathy=3\n",
      "[5] ✅ 5 → Empathy=3\n",
      "[6] ✅ 6 → Empathy=3\n",
      "[7] ✅ 7 → Empathy=2\n",
      "[8] ✅ 8 → Empathy=3\n",
      "[9] ✅ 9 → Empathy=3\n",
      "[10] ✅ 10 → Empathy=3\n",
      "[11] ✅ 11 → Empathy=3\n",
      "[12] ✅ 12 → Empathy=3\n",
      "[13] ✅ 13 → Empathy=3\n",
      "[14] ✅ 14 → Empathy=3\n",
      "[15] ✅ 15 → Empathy=3\n",
      "[16] ✅ 16 → Empathy=3\n",
      "[17] ✅ 17 → Empathy=3\n",
      "[18] ✅ 18 → Empathy=3\n",
      "[19] ✅ 19 → Empathy=3\n",
      "[20] ✅ 20 → Empathy=3\n",
      "[21] ⚠️ Attempt 1 for 21 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[21] ✅ 21 → Empathy=3\n",
      "[22] ✅ 22 → Empathy=3\n",
      "[23] ✅ 23 → Empathy=3\n",
      "[24] ✅ 24 → Empathy=3\n",
      "[25] ✅ 25 → Empathy=3\n",
      "[26] ✅ 26 → Empathy=3\n",
      "[27] ✅ 27 → Empathy=3\n",
      "[28] ✅ 28 → Empathy=3\n",
      "[29] ✅ 29 → Empathy=3\n",
      "[30] ✅ 30 → Empathy=3\n",
      "[31] ✅ 31 → Empathy=3\n",
      "[32] ✅ 32 → Empathy=3\n",
      "[33] ✅ 33 → Empathy=3\n",
      "[34] ✅ 34 → Empathy=3\n",
      "[35] ✅ 35 → Empathy=3\n",
      "[36] ✅ 36 → Empathy=3\n",
      "[37] ✅ 37 → Empathy=3\n",
      "[38] ✅ 38 → Empathy=3\n",
      "[39] ✅ 39 → Empathy=3\n",
      "[40] ✅ 40 → Empathy=3\n",
      "[41] ⚠️ Attempt 1 for 41 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[41] ✅ 41 → Empathy=3\n",
      "[42] ✅ 42 → Empathy=3\n",
      "[43] ✅ 43 → Empathy=3\n",
      "[44] ✅ 44 → Empathy=3\n",
      "[45] ✅ 45 → Empathy=3\n",
      "[46] ✅ 46 → Empathy=3\n",
      "[47] ✅ 47 → Empathy=3\n",
      "[48] ✅ 48 → Empathy=3\n",
      "[49] ✅ 49 → Empathy=3\n",
      "[50] ✅ 50 → Empathy=3\n",
      "[51] ✅ 51 → Empathy=3\n",
      "[52] ✅ 52 → Empathy=3\n",
      "[53] ✅ 53 → Empathy=3\n",
      "[54] ✅ 54 → Empathy=3\n",
      "[55] ✅ 55 → Empathy=3\n",
      "[56] ✅ 56 → Empathy=3\n",
      "[57] ✅ 57 → Empathy=3\n",
      "[58] ✅ 58 → Empathy=3\n",
      "[59] ✅ 59 → Empathy=3\n",
      "[60] ✅ 60 → Empathy=3\n",
      "[61] ⚠️ Attempt 1 for 61 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[61] ⚠️ Attempt 2 for 61 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[61] ✅ 61 → Empathy=3\n",
      "[62] ✅ 62 → Empathy=3\n",
      "[63] ✅ 63 → Empathy=3\n",
      "[64] ✅ 64 → Empathy=3\n",
      "[65] ✅ 65 → Empathy=3\n",
      "[66] ✅ 66 → Empathy=3\n",
      "[67] ✅ 67 → Empathy=3\n",
      "[68] ✅ 68 → Empathy=3\n",
      "[69] ✅ 69 → Empathy=3\n",
      "[70] ✅ 70 → Empathy=3\n",
      "[71] ✅ 71 → Empathy=3\n",
      "[72] ✅ 72 → Empathy=3\n",
      "[73] ✅ 73 → Empathy=3\n",
      "[74] ✅ 74 → Empathy=3\n",
      "[75] ✅ 75 → Empathy=3\n",
      "[76] ✅ 76 → Empathy=3\n",
      "[77] ✅ 77 → Empathy=3\n",
      "[78] ✅ 78 → Empathy=3\n",
      "[79] ✅ 79 → Empathy=3\n",
      "[80] ✅ 80 → Empathy=3\n",
      "[81] ⚠️ Attempt 1 for 81 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[81] ⚠️ Attempt 2 for 81 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[81] ⚠️ Attempt 3 for 81 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[81] ✅ 81 → Empathy=3\n",
      "[82] ✅ 82 → Empathy=3\n",
      "[83] ✅ 83 → Empathy=3\n",
      "[84] ✅ 84 → Empathy=3\n",
      "[85] ✅ 85 → Empathy=3\n",
      "[86] ✅ 86 → Empathy=3\n",
      "[87] ✅ 87 → Empathy=2\n",
      "[88] ✅ 88 → Empathy=3\n",
      "[89] ✅ 89 → Empathy=3\n",
      "[90] ✅ 90 → Empathy=3\n",
      "[91] ✅ 91 → Empathy=2\n",
      "[92] ✅ 92 → Empathy=3\n",
      "[93] ✅ 93 → Empathy=3\n",
      "[94] ✅ 94 → Empathy=3\n",
      "[95] ✅ 95 → Empathy=3\n",
      "[96] ✅ 96 → Empathy=3\n",
      "[97] ✅ 97 → Empathy=3\n",
      "[98] ✅ 98 → Empathy=3\n",
      "[99] ✅ 99 → Empathy=2\n",
      "[100] ✅ 100 → Empathy=3\n",
      "[101] ⚠️ Attempt 1 for 101 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[101] ⚠️ Attempt 2 for 101 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[101] ⚠️ Attempt 3 for 101 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[101] ⚠️ Attempt 4 for 101 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[101] ✅ 101 → Empathy=3\n",
      "[102] ✅ 102 → Empathy=3\n",
      "[103] ✅ 103 → Empathy=3\n",
      "[104] ✅ 104 → Empathy=3\n",
      "[105] ✅ 105 → Empathy=3\n",
      "[106] ✅ 106 → Empathy=3\n",
      "[107] ✅ 107 → Empathy=3\n",
      "[108] ✅ 108 → Empathy=3\n",
      "[109] ✅ 109 → Empathy=3\n",
      "[110] ✅ 110 → Empathy=3\n",
      "[111] ✅ 111 → Empathy=3\n",
      "[112] ✅ 112 → Empathy=3\n",
      "[113] ✅ 113 → Empathy=3\n",
      "[114] ✅ 114 → Empathy=3\n",
      "[115] ✅ 115 → Empathy=3\n",
      "[116] ✅ 116 → Empathy=3\n",
      "[117] ✅ 117 → Empathy=3\n",
      "[118] ✅ 118 → Empathy=3\n",
      "[119] ✅ 119 → Empathy=3\n",
      "[120] ✅ 120 → Empathy=3\n",
      "[121] ⚠️ Attempt 1 for 121 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[121] ✅ 121 → Empathy=3\n",
      "[122] ✅ 122 → Empathy=3\n",
      "[123] ✅ 123 → Empathy=3\n",
      "[124] ✅ 124 → Empathy=3\n",
      "[125] ✅ 125 → Empathy=3\n",
      "[126] ✅ 126 → Empathy=3\n",
      "[127] ✅ 127 → Empathy=3\n",
      "[128] ✅ 128 → Empathy=3\n",
      "[129] ✅ 129 → Empathy=3\n",
      "[130] ✅ 130 → Empathy=3\n",
      "[131] ✅ 131 → Empathy=3\n",
      "[132] ✅ 132 → Empathy=3\n",
      "[133] ✅ 133 → Empathy=3\n",
      "[134] ✅ 134 → Empathy=3\n",
      "[135] ✅ 135 → Empathy=3\n",
      "[136] ✅ 136 → Empathy=3\n",
      "[137] ✅ 137 → Empathy=3\n",
      "[138] ✅ 138 → Empathy=2\n",
      "[139] ✅ 139 → Empathy=3\n",
      "[140] ✅ 140 → Empathy=3\n",
      "[141] ⚠️ Attempt 1 for 141 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/completions\n",
      "[141] ✅ 141 → Empathy=3\n",
      "[142] ✅ 142 → Empathy=3\n",
      "[143] ✅ 143 → Empathy=3\n",
      "[144] ✅ 144 → Empathy=3\n",
      "[145] ✅ 145 → Empathy=3\n",
      "[146] ✅ 146 → Empathy=3\n",
      "[147] ✅ 147 → Empathy=3\n",
      "[148] ✅ 148 → Empathy=3\n",
      "[149] ✅ 149 → Empathy=2\n",
      "[150] ✅ 150 → Empathy=3\n",
      "[151] ✅ 151 → Empathy=3\n",
      "[152] ✅ 152 → Empathy=2\n",
      "[153] ✅ 153 → Empathy=3\n",
      "[154] ✅ 154 → Empathy=3\n",
      "[155] ✅ 155 → Empathy=3\n",
      "[156] ✅ 156 → Empathy=3\n",
      "✅ Done—new CSV with affective empathy scores saved to claude_response_gpt_rating_understanding.csv\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# Config: Set up your API key and endpoints\n",
    "API_KEY = \"\"\n",
    "API_BASE_URL = 'https://litellm.sph-prod.ethz.ch/'\n",
    "COMPLETION_URL = API_BASE_URL + 'completions'\n",
    "\n",
    "headers = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# Refined rubric prompt for cognitive Empathy\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on COGNITIVE Empathy using a 1–3 scale.\n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "\n",
    "2. Read the Model Response.\n",
    "\n",
    "3. Judge its cognitive Empathy using these criteria:\n",
    "\n",
    "1 = Low: The response overlooks or misrepresents the speaker’s perspective. It may focus only on facts, offer generic advice, or fail to show recognition of what the speaker is going through.\n",
    "\n",
    "2 = Moderate: The response makes an effort to engage with the speaker’s experience but lacks nuance. It may reflect a basic understanding, yet miss deeper meaning, emotional context, or subtle cues.\n",
    "\n",
    "3 = High: The response clearly reflects the speaker’s situation or emotional state with insight and precision. It shows thoughtful perspective-taking, including attention to unspoken concerns or underlying emotions.\n",
    "\n",
    "Be deliberate and use the full scale—only give a 3 when the response shows specific evidence of deep cognitive Empathy.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Cognitive Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "\n",
    "\n",
    "\n",
    "# File paths\n",
    "input_file  = 'claude_response_gpt_rating.csv'\n",
    "output_file = 'claude_response_gpt_rating_understanding.csv'\n",
    "\n",
    "# Track already evaluated prompt IDs (to resume safely)\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# Read input CSV (all original columns)\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# Prepare output CSV: original columns + two new ones\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Cognitive Empathy Score',\n",
    "    'Cognitive Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    # write header if file is new\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Build the LLM prompt\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "        payload = {\n",
    "            \"model\": \"gpt-4o\",\n",
    "            \"prompt\": evaluation_prompt + \"\\n\" + convo,\n",
    "            \"max_tokens\": 2000\n",
    "        }\n",
    "\n",
    "        # Retry loop\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=headers, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['text'].strip()\n",
    "\n",
    "                # Parse out score and reason\n",
    "                m = re.search(\n",
    "                    r\"Cognitive Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score  = m.group(1)\n",
    "                reason = m.group(2).replace('\\n',' ').strip()\n",
    "\n",
    "                # Write full original row + new columns\n",
    "                out_row = dict(row)\n",
    "                out_row['Cognitive Empathy Score']  = score\n",
    "                out_row['Cognitive Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Empathy={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40' attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with affective empathy scores saved to\", output_file)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Claude evaluates gpt's response [cognitive understandability]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] ✅ 1 → Understanding=3\n",
      "[2] ✅ 2 → Understanding=3\n",
      "[3] ✅ 3 → Understanding=3\n",
      "[4] ✅ 4 → Understanding=3\n",
      "[5] ✅ 5 → Understanding=3\n",
      "[6] ✅ 6 → Understanding=3\n",
      "[7] ✅ 7 → Understanding=3\n",
      "[8] ✅ 8 → Understanding=3\n",
      "[9] ✅ 9 → Understanding=3\n",
      "[10] ✅ 10 → Understanding=2\n",
      "[11] ✅ 11 → Understanding=3\n",
      "[12] ✅ 12 → Understanding=3\n",
      "[13] ✅ 13 → Understanding=3\n",
      "[14] ✅ 14 → Understanding=3\n",
      "[15] ✅ 15 → Understanding=2\n",
      "[16] ✅ 16 → Understanding=3\n",
      "[17] ✅ 17 → Understanding=3\n",
      "[18] ✅ 18 → Understanding=3\n",
      "[19] ✅ 19 → Understanding=3\n",
      "[20] ✅ 20 → Understanding=3\n",
      "[21] ✅ 21 → Understanding=2\n",
      "[22] ✅ 22 → Understanding=2\n",
      "[23] ✅ 23 → Understanding=3\n",
      "[24] ✅ 24 → Understanding=3\n",
      "[25] ✅ 25 → Understanding=3\n",
      "[26] ✅ 26 → Understanding=3\n",
      "[27] ⚠️ Attempt 1 for 27 failed: 504 Server Error: Gateway Time-out for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[27] ✅ 27 → Understanding=3\n",
      "[28] ✅ 28 → Understanding=3\n",
      "[29] ✅ 29 → Understanding=3\n",
      "[30] ✅ 30 → Understanding=3\n",
      "[31] ✅ 31 → Understanding=3\n",
      "[32] ✅ 32 → Understanding=3\n",
      "[33] ✅ 33 → Understanding=3\n",
      "[34] ✅ 34 → Understanding=3\n",
      "[35] ✅ 35 → Understanding=3\n",
      "[36] ✅ 36 → Understanding=3\n",
      "[37] ✅ 37 → Understanding=3\n",
      "[38] ✅ 38 → Understanding=3\n",
      "[39] ✅ 39 → Understanding=3\n",
      "[40] ✅ 40 → Understanding=3\n",
      "[41] ✅ 41 → Understanding=3\n",
      "[42] ✅ 42 → Understanding=3\n",
      "[43] ✅ 43 → Understanding=3\n",
      "[44] ✅ 44 → Understanding=2\n",
      "[45] ✅ 45 → Understanding=2\n",
      "[46] ✅ 46 → Understanding=3\n",
      "[47] ✅ 47 → Understanding=3\n",
      "[48] ✅ 48 → Understanding=3\n",
      "[49] ✅ 49 → Understanding=3\n",
      "[50] ✅ 50 → Understanding=3\n",
      "[51] ✅ 51 → Understanding=3\n",
      "[52] ✅ 52 → Understanding=3\n",
      "[53] ✅ 53 → Understanding=2\n",
      "[54] ✅ 54 → Understanding=3\n",
      "[55] ✅ 55 → Understanding=3\n",
      "[56] ✅ 56 → Understanding=3\n",
      "[57] ⚠️ Attempt 1 for 57 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[57] ⚠️ Attempt 2 for 57 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[57] ✅ 57 → Understanding=3\n",
      "[58] ✅ 58 → Understanding=3\n",
      "[59] ✅ 59 → Understanding=3\n",
      "[60] ✅ 60 → Understanding=3\n",
      "[61] ✅ 61 → Understanding=3\n",
      "[62] ✅ 62 → Understanding=3\n",
      "[63] ✅ 63 → Understanding=3\n",
      "[64] ✅ 64 → Understanding=3\n",
      "[65] ✅ 65 → Understanding=2\n",
      "[66] ✅ 66 → Understanding=3\n",
      "[67] ✅ 67 → Understanding=3\n",
      "[68] ✅ 68 → Understanding=3\n",
      "[69] ✅ 69 → Understanding=3\n",
      "[70] ✅ 70 → Understanding=3\n",
      "[71] ✅ 71 → Understanding=3\n",
      "[72] ✅ 72 → Understanding=3\n",
      "[73] ✅ 73 → Understanding=3\n",
      "[74] ✅ 74 → Understanding=3\n",
      "[75] ✅ 75 → Understanding=3\n",
      "[76] ✅ 76 → Understanding=2\n",
      "[77] ⚠️ Attempt 1 for 77 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[77] ✅ 77 → Understanding=3\n",
      "[78] ✅ 78 → Understanding=3\n",
      "[79] ✅ 79 → Understanding=3\n",
      "[80] ✅ 80 → Understanding=3\n",
      "[81] ✅ 81 → Understanding=3\n",
      "[82] ✅ 82 → Understanding=3\n",
      "[83] ✅ 83 → Understanding=3\n",
      "[84] ✅ 84 → Understanding=2\n",
      "[85] ✅ 85 → Understanding=3\n",
      "[86] ✅ 86 → Understanding=3\n",
      "[87] ✅ 87 → Understanding=3\n",
      "[88] ✅ 88 → Understanding=3\n",
      "[89] ✅ 89 → Understanding=3\n",
      "[90] ✅ 90 → Understanding=3\n",
      "[91] ✅ 91 → Understanding=2\n",
      "[92] ✅ 92 → Understanding=3\n",
      "[93] ✅ 93 → Understanding=3\n",
      "[94] ✅ 94 → Understanding=2\n",
      "[95] ✅ 95 → Understanding=3\n",
      "[96] ✅ 96 → Understanding=3\n",
      "[97] ⚠️ Attempt 1 for 97 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[97] ⚠️ Attempt 2 for 97 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[97] ✅ 97 → Understanding=3\n",
      "[98] ✅ 98 → Understanding=3\n",
      "[99] ✅ 99 → Understanding=3\n",
      "[100] ✅ 100 → Understanding=3\n",
      "[101] ✅ 101 → Understanding=3\n",
      "[102] ✅ 102 → Understanding=3\n",
      "[103] ✅ 103 → Understanding=3\n",
      "[104] ✅ 104 → Understanding=3\n",
      "[105] ✅ 105 → Understanding=3\n",
      "[106] ✅ 106 → Understanding=2\n",
      "[107] ✅ 107 → Understanding=3\n",
      "[108] ✅ 108 → Understanding=3\n",
      "[109] ✅ 109 → Understanding=3\n",
      "[110] ✅ 110 → Understanding=3\n",
      "[111] ✅ 111 → Understanding=3\n",
      "[112] ✅ 112 → Understanding=3\n",
      "[113] ✅ 113 → Understanding=3\n",
      "[114] ✅ 114 → Understanding=3\n",
      "[115] ✅ 115 → Understanding=3\n",
      "[116] ✅ 116 → Understanding=3\n",
      "[117] ⚠️ Attempt 1 for 117 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[117] ✅ 117 → Understanding=3\n",
      "[118] ✅ 118 → Understanding=3\n",
      "[119] ✅ 119 → Understanding=3\n",
      "[120] ✅ 120 → Understanding=3\n",
      "[121] ✅ 121 → Understanding=2\n",
      "[122] ✅ 122 → Understanding=3\n",
      "[123] ✅ 123 → Understanding=2\n",
      "[124] ✅ 124 → Understanding=3\n",
      "[125] ✅ 125 → Understanding=3\n",
      "[126] ✅ 126 → Understanding=3\n",
      "[127] ✅ 127 → Understanding=3\n",
      "[128] ✅ 128 → Understanding=3\n",
      "[129] ✅ 129 → Understanding=3\n",
      "[130] ✅ 130 → Understanding=3\n",
      "[131] ✅ 131 → Understanding=3\n",
      "[132] ✅ 132 → Understanding=3\n",
      "[133] ✅ 133 → Understanding=3\n",
      "[134] ✅ 134 → Understanding=3\n",
      "[135] ✅ 135 → Understanding=2\n",
      "[136] ✅ 136 → Understanding=3\n",
      "[137] ⚠️ Attempt 1 for 137 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[137] ✅ 137 → Understanding=3\n",
      "[138] ✅ 138 → Understanding=3\n",
      "[139] ✅ 139 → Understanding=3\n",
      "[140] ✅ 140 → Understanding=3\n",
      "[141] ✅ 141 → Understanding=3\n",
      "[142] ✅ 142 → Understanding=3\n",
      "[143] ✅ 143 → Understanding=2\n",
      "[144] ✅ 144 → Understanding=3\n",
      "[145] ✅ 145 → Understanding=3\n",
      "[146] ✅ 146 → Understanding=2\n",
      "[147] ✅ 147 → Understanding=2\n",
      "[148] ✅ 148 → Understanding=3\n",
      "[149] ✅ 149 → Understanding=3\n",
      "[150] ✅ 150 → Understanding=3\n",
      "[151] ✅ 151 → Understanding=3\n",
      "[152] ✅ 152 → Understanding=3\n",
      "[153] ✅ 153 → Understanding=2\n",
      "[154] ✅ 154 → Understanding=3\n",
      "[155] ✅ 155 → Understanding=3\n",
      "[156] ✅ 156 → Understanding=3\n",
      "✅ Done—new CSV with cognitive understanding scores saved to gpt_response_claude_rating_understanding.csv\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# ─── Config: Proxy and Claude 3.7 via LiteLLM ───────────────────────────────────\n",
    "\n",
    "API_KEY = \"\"\n",
    "API_BASE_URL = 'https://litellm.sph-prod.ethz.ch/'\n",
    "COMPLETION_URL = API_BASE_URL + 'v1/chat/completions'\n",
    "HEADERS = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# ─── Evaluation Rubric for Cognitive Empathy ─────────────────────────────\n",
    "\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on COGNITIVE Empathy using a 1–3 scale.\n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "2. Read the Model Response.\n",
    "3. Judge its cognitive Empathy using these criteria:\n",
    "\n",
    "1 = Low: The response overlooks or misrepresents the speaker’s perspective. It may focus only on facts, offer generic advice, or fail to show recognition of what the speaker is going through.\n",
    "\n",
    "2 = Moderate: The response makes an effort to engage with the speaker’s experience but lacks nuance. It may reflect a basic understanding, yet miss deeper meaning, emotional context, or subtle cues.\n",
    "\n",
    "3 = High: The response clearly reflects the speaker’s situation or emotional state with insight and precision. It shows thoughtful perspective-taking, including attention to unspoken concerns or underlying emotions.\n",
    "\n",
    "Be deliberate and use the full scale—only give a 3 when the response shows specific evidence of deep cognitive Empathy.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Cognitive Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "# ─── File Paths ────────────────────────────────────────────────────────────────\n",
    "\n",
    "input_file = 'gpt_response_claude_rating.csv'\n",
    "output_file = 'gpt_response_claude_rating_understanding.csv'\n",
    "\n",
    "# ─── Track Already Processed Rows ──────────────────────────────────────────────\n",
    "\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# ─── Read Input CSV ────────────────────────────────────────────────────────────\n",
    "\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# ─── Prepare Output CSV ────────────────────────────────────────────────────────\n",
    "\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Cognitive Empathy Score',\n",
    "    'Cognitive Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Prepare prompt\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "\n",
    "        payload = {\n",
    "            \"model\": \"claude-3-7-sonnet\",\n",
    "            \"messages\": [\n",
    "                {\"role\": \"user\", \"content\": evaluation_prompt + \"\\n\" + convo}\n",
    "            ],\n",
    "            \"max_tokens\": 1000\n",
    "        }\n",
    "\n",
    "        # Retry logic\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=HEADERS, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['message']['content'].strip()\n",
    "\n",
    "                # Parse output\n",
    "                m = re.search(\n",
    "                    r\"Cognitive Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score = m.group(1)\n",
    "                reason = m.group(2).replace('\\n', ' ').strip()\n",
    "\n",
    "                out_row = dict(row)\n",
    "                out_row['Cognitive Empathy Score'] = score\n",
    "                out_row['Cognitive Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Understanding={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40 attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with cognitive Empathy scores saved to\", output_file)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Claude evaluates claude's response [cognitive understandability]:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1] ✅ 1 → Understanding=3\n",
      "[2] ✅ 2 → Understanding=3\n",
      "[3] ✅ 3 → Understanding=3\n",
      "[4] ✅ 4 → Understanding=3\n",
      "[5] ✅ 5 → Understanding=3\n",
      "[6] ✅ 6 → Understanding=3\n",
      "[7] ✅ 7 → Understanding=3\n",
      "[8] ✅ 8 → Understanding=3\n",
      "[9] ✅ 11 → Understanding=3\n",
      "[10] ✅ 12 → Understanding=3\n",
      "[11] ✅ 13 → Understanding=3\n",
      "[12] ✅ 14 → Understanding=3\n",
      "[13] ✅ 15 → Understanding=3\n",
      "[14] ✅ 9 → Understanding=3\n",
      "[15] ✅ 10 → Understanding=3\n",
      "[16] ✅ 16 → Understanding=3\n",
      "[17] ✅ 17 → Understanding=3\n",
      "[18] ✅ 18 → Understanding=3\n",
      "[19] ✅ 19 → Understanding=3\n",
      "[20] ✅ 20 → Understanding=3\n",
      "[21] ✅ 21 → Understanding=3\n",
      "[22] ✅ 22 → Understanding=3\n",
      "[23] ✅ 23 → Understanding=3\n",
      "[24] ✅ 24 → Understanding=3\n",
      "[25] ✅ 25 → Understanding=3\n",
      "[26] ⚠️ Attempt 1 for 26 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[26] ⚠️ Attempt 2 for 26 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[26] ✅ 26 → Understanding=3\n",
      "[27] ✅ 27 → Understanding=3\n",
      "[28] ✅ 28 → Understanding=2\n",
      "[29] ✅ 29 → Understanding=3\n",
      "[30] ✅ 30 → Understanding=3\n",
      "[31] ✅ 31 → Understanding=3\n",
      "[32] ✅ 32 → Understanding=3\n",
      "[33] ✅ 33 → Understanding=3\n",
      "[34] ✅ 34 → Understanding=3\n",
      "[35] ✅ 35 → Understanding=3\n",
      "[36] ✅ 36 → Understanding=3\n",
      "[37] ✅ 37 → Understanding=3\n",
      "[38] ✅ 38 → Understanding=3\n",
      "[39] ✅ 39 → Understanding=3\n",
      "[40] ✅ 40 → Understanding=3\n",
      "[41] ✅ 41 → Understanding=3\n",
      "[42] ✅ 42 → Understanding=3\n",
      "[43] ✅ 43 → Understanding=3\n",
      "[44] ✅ 44 → Understanding=3\n",
      "[45] ✅ 45 → Understanding=3\n",
      "[46] ✅ 46 → Understanding=3\n",
      "[47] ✅ 47 → Understanding=3\n",
      "[48] ✅ 48 → Understanding=3\n",
      "[49] ✅ 49 → Understanding=3\n",
      "[50] ✅ 50 → Understanding=3\n",
      "[51] ✅ 51 → Understanding=3\n",
      "[52] ✅ 52 → Understanding=3\n",
      "[53] ✅ 53 → Understanding=3\n",
      "[54] ✅ 54 → Understanding=3\n",
      "[55] ✅ 55 → Understanding=3\n",
      "[56] ✅ 56 → Understanding=3\n",
      "[57] ✅ 57 → Understanding=3\n",
      "[58] ✅ 58 → Understanding=3\n",
      "[59] ✅ 59 → Understanding=3\n",
      "[60] ✅ 60 → Understanding=3\n",
      "[61] ✅ 61 → Understanding=3\n",
      "[62] ✅ 62 → Understanding=3\n",
      "[63] ✅ 63 → Understanding=3\n",
      "[64] ✅ 64 → Understanding=3\n",
      "[65] ⚠️ Attempt 1 for 65 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[65] ✅ 65 → Understanding=3\n",
      "[66] ✅ 66 → Understanding=3\n",
      "[67] ✅ 67 → Understanding=3\n",
      "[68] ✅ 68 → Understanding=2\n",
      "[69] ✅ 69 → Understanding=3\n",
      "[70] ✅ 70 → Understanding=3\n",
      "[71] ✅ 71 → Understanding=3\n",
      "[72] ✅ 72 → Understanding=3\n",
      "[73] ✅ 73 → Understanding=3\n",
      "[74] ✅ 74 → Understanding=3\n",
      "[75] ✅ 75 → Understanding=3\n",
      "[76] ✅ 76 → Understanding=3\n",
      "[77] ✅ 77 → Understanding=3\n",
      "[78] ✅ 78 → Understanding=3\n",
      "[79] ✅ 79 → Understanding=3\n",
      "[80] ✅ 80 → Understanding=3\n",
      "[81] ✅ 81 → Understanding=3\n",
      "[82] ✅ 82 → Understanding=3\n",
      "[83] ✅ 83 → Understanding=3\n",
      "[84] ✅ 84 → Understanding=3\n",
      "[85] ✅ 85 → Understanding=3\n",
      "[86] ✅ 86 → Understanding=3\n",
      "[87] ✅ 87 → Understanding=3\n",
      "[88] ✅ 88 → Understanding=3\n",
      "[89] ✅ 89 → Understanding=3\n",
      "[90] ✅ 90 → Understanding=3\n",
      "[91] ✅ 91 → Understanding=2\n",
      "[92] ✅ 92 → Understanding=3\n",
      "[93] ✅ 93 → Understanding=3\n",
      "[94] ✅ 94 → Understanding=3\n",
      "[95] ✅ 95 → Understanding=3\n",
      "[96] ✅ 96 → Understanding=3\n",
      "[97] ✅ 97 → Understanding=3\n",
      "[98] ✅ 98 → Understanding=3\n",
      "[99] ✅ 99 → Understanding=3\n",
      "[100] ✅ 100 → Understanding=3\n",
      "[101] ✅ 101 → Understanding=3\n",
      "[102] ✅ 102 → Understanding=3\n",
      "[103] ✅ 103 → Understanding=3\n",
      "[104] ⚠️ Attempt 1 for 104 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[104] ⚠️ Attempt 2 for 104 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[104] ✅ 104 → Understanding=3\n",
      "[105] ✅ 105 → Understanding=3\n",
      "[106] ✅ 106 → Understanding=3\n",
      "[107] ✅ 107 → Understanding=3\n",
      "[108] ✅ 108 → Understanding=3\n",
      "[109] ✅ 109 → Understanding=3\n",
      "[110] ✅ 110 → Understanding=3\n",
      "[111] ✅ 111 → Understanding=3\n",
      "[112] ✅ 112 → Understanding=3\n",
      "[113] ✅ 113 → Understanding=3\n",
      "[114] ✅ 114 → Understanding=3\n",
      "[115] ✅ 115 → Understanding=3\n",
      "[116] ✅ 116 → Understanding=3\n",
      "[117] ✅ 117 → Understanding=3\n",
      "[118] ✅ 118 → Understanding=3\n",
      "[119] ✅ 119 → Understanding=3\n",
      "[120] ✅ 120 → Understanding=3\n",
      "[121] ✅ 121 → Understanding=3\n",
      "[122] ✅ 122 → Understanding=3\n",
      "[123] ✅ 123 → Understanding=3\n",
      "[124] ✅ 124 → Understanding=3\n",
      "[125] ✅ 125 → Understanding=3\n",
      "[126] ✅ 126 → Understanding=3\n",
      "[127] ✅ 127 → Understanding=3\n",
      "[128] ✅ 128 → Understanding=3\n",
      "[129] ✅ 129 → Understanding=3\n",
      "[130] ✅ 130 → Understanding=3\n",
      "[131] ✅ 131 → Understanding=2\n",
      "[132] ✅ 132 → Understanding=3\n",
      "[133] ✅ 133 → Understanding=3\n",
      "[134] ✅ 134 → Understanding=3\n",
      "[135] ✅ 135 → Understanding=3\n",
      "[136] ✅ 136 → Understanding=3\n",
      "[137] ✅ 137 → Understanding=3\n",
      "[138] ✅ 138 → Understanding=3\n",
      "[139] ✅ 139 → Understanding=3\n",
      "[140] ✅ 140 → Understanding=3\n",
      "[141] ✅ 141 → Understanding=3\n",
      "[142] ✅ 142 → Understanding=3\n",
      "[143] ⚠️ Attempt 1 for 143 failed: 429 Client Error: Too Many Requests for url: https://litellm.sph-prod.ethz.ch/v1/chat/completions\n",
      "[143] ✅ 143 → Understanding=3\n",
      "[144] ✅ 144 → Understanding=3\n",
      "[145] ✅ 145 → Understanding=3\n",
      "[146] ✅ 146 → Understanding=3\n",
      "[147] ✅ 147 → Understanding=3\n",
      "[148] ✅ 148 → Understanding=3\n",
      "[149] ✅ 149 → Understanding=2\n",
      "[150] ✅ 150 → Understanding=3\n",
      "[151] ✅ 151 → Understanding=3\n",
      "[152] ✅ 152 → Understanding=3\n",
      "[153] ✅ 153 → Understanding=3\n",
      "[154] ✅ 154 → Understanding=3\n",
      "[155] ✅ 155 → Understanding=3\n",
      "[156] ✅ 156 → Understanding=3\n",
      "✅ Done—new CSV with cognitive understanding scores saved to claude_response_claude_rating_understanding.csv\n"
     ]
    }
   ],
   "source": [
    "import csv\n",
    "import os\n",
    "import time\n",
    "import re\n",
    "import requests\n",
    "\n",
    "# ─── Config: Proxy and Claude 3.7 via LiteLLM ───────────────────────────────────\n",
    "\n",
    "API_KEY = \"\"\n",
    "API_BASE_URL = 'https://litellm.sph-prod.ethz.ch/'\n",
    "COMPLETION_URL = API_BASE_URL + 'v1/chat/completions'\n",
    "HEADERS = {\n",
    "    \"Content-Type\": \"application/json\",\n",
    "    \"Authorization\": f\"Bearer {API_KEY}\"\n",
    "}\n",
    "\n",
    "# ─── Evaluation Rubric for Cognitive Empathy ─────────────────────────────\n",
    "\n",
    "evaluation_prompt = \"\"\"\n",
    "You are an assistant whose sole task is to rate the below model response on COGNITIVE Empathy using a 1–3 scale.\n",
    "\n",
    "Evaluation Steps:\n",
    "\n",
    "1. Read the Prompt Text.\n",
    "2. Read the Model Response.\n",
    "3. Judge its cognitive Empathy using these criteria:\n",
    "\n",
    "1 = Low: The response overlooks or misrepresents the speaker’s perspective. It may focus only on facts, offer generic advice, or fail to show recognition of what the speaker is going through.\n",
    "\n",
    "2 = Moderate: The response makes an effort to engage with the speaker’s experience but lacks nuance. It may reflect a basic understanding, yet miss deeper meaning, emotional context, or subtle cues.\n",
    "\n",
    "3 = High: The response clearly reflects the speaker’s situation or emotional state with insight and precision. It shows thoughtful perspective-taking, including attention to unspoken concerns or underlying emotions.\n",
    "\n",
    "Be deliberate and use the full scale—only give a 3 when the response shows specific evidence of deep cognitive Empathy.\n",
    "\n",
    "After reading the conversation, reply in exactly this format (no extra text):\n",
    "\n",
    "Cognitive Empathy: <1 or 2 or 3>\n",
    "Reason: <one-sentence justification>\n",
    "\"\"\"\n",
    "\n",
    "# ─── File Paths ────────────────────────────────────────────────────────────────\n",
    "\n",
    "input_file = 'claude_response_claude_rating.csv'\n",
    "output_file = 'claude_response_claude_rating_understanding.csv'\n",
    "\n",
    "# ─── Track Already Processed Rows ──────────────────────────────────────────────\n",
    "\n",
    "existing_ids = set()\n",
    "if os.path.exists(output_file):\n",
    "    with open(output_file, newline='', encoding='utf-8') as f:\n",
    "        reader = csv.DictReader(f)\n",
    "        for row in reader:\n",
    "            existing_ids.add(row['Prompt Number'])\n",
    "\n",
    "# ─── Read Input CSV ────────────────────────────────────────────────────────────\n",
    "\n",
    "with open(input_file, newline='', encoding='utf-8') as infile:\n",
    "    reader = csv.DictReader(infile)\n",
    "    rows = list(reader)\n",
    "    original_fieldnames = reader.fieldnames\n",
    "\n",
    "# ─── Prepare Output CSV ────────────────────────────────────────────────────────\n",
    "\n",
    "new_fieldnames = original_fieldnames + [\n",
    "    'Cognitive Empathy Score',\n",
    "    'Cognitive Empathy Reason'\n",
    "]\n",
    "\n",
    "with open(output_file, 'a', newline='', encoding='utf-8') as outfile:\n",
    "    writer = csv.DictWriter(outfile, fieldnames=new_fieldnames)\n",
    "    if os.stat(output_file).st_size == 0:\n",
    "        writer.writeheader()\n",
    "\n",
    "    for idx, row in enumerate(rows):\n",
    "        pid = row['Prompt Number']\n",
    "        if pid in existing_ids:\n",
    "            print(f\"[{idx+1}] Skipping {pid} (already done).\")\n",
    "            continue\n",
    "\n",
    "        # Prepare prompt\n",
    "        convo = (\n",
    "            f\"Conversation Prompt:\\n{row['Prompt Text']}\\n\\n\"\n",
    "            f\"Model Response:\\n{row['Model Response']}\\n\"\n",
    "        )\n",
    "\n",
    "        payload = {\n",
    "            \"model\": \"claude-3-7-sonnet\",\n",
    "            \"messages\": [\n",
    "                {\"role\": \"user\", \"content\": evaluation_prompt + \"\\n\" + convo}\n",
    "            ],\n",
    "            \"max_tokens\": 1000\n",
    "        }\n",
    "\n",
    "        # Retry logic\n",
    "        for attempt in range(1, 40):\n",
    "            try:\n",
    "                resp = requests.post(COMPLETION_URL, headers=HEADERS, json=payload)\n",
    "                resp.raise_for_status()\n",
    "                text = resp.json()['choices'][0]['message']['content'].strip()\n",
    "\n",
    "                # Parse output\n",
    "                m = re.search(\n",
    "                    r\"Cognitive Empathy:\\s*([123])\\s*Reason:\\s*(.+)\",\n",
    "                    text, re.DOTALL\n",
    "                )\n",
    "                if not m:\n",
    "                    raise ValueError(f\"Unexpected format:\\n{text}\")\n",
    "\n",
    "                score = m.group(1)\n",
    "                reason = m.group(2).replace('\\n', ' ').strip()\n",
    "\n",
    "                out_row = dict(row)\n",
    "                out_row['Cognitive Empathy Score'] = score\n",
    "                out_row['Cognitive Empathy Reason'] = reason\n",
    "                writer.writerow(out_row)\n",
    "\n",
    "                print(f\"[{idx+1}] ✅ {pid} → Understanding={score}\")\n",
    "                break\n",
    "\n",
    "            except Exception as e:\n",
    "                print(f\"[{idx+1}] ⚠️ Attempt {attempt} for {pid} failed: {e}\")\n",
    "                time.sleep(2)\n",
    "        else:\n",
    "            print(f\"[{idx+1}] ❌ Could not evaluate {pid} after 40 attempts.\")\n",
    "\n",
    "        outfile.flush()\n",
    "        time.sleep(1)\n",
    "\n",
    "print(\"✅ Done—new CSV with cognitive Empathy scores saved to\", output_file)\n"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "authorship_tag": "ABX9TyMtJjd4fG1Mo6GAQ0TC5Yy0",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "AI4Good",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
