{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8fe2152",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import ast\n",
    "import numpy as np\n",
    "import json\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import re\n",
    "\n",
    "df = pd.read_csv('../data/all.csv')\n",
    "df[['audioId', 'annotator', 'human', \"emotions\"]]\n",
    "df = df[df['human'] == False]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a02a3356",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import ast\n",
    "from scipy.stats import iqr\n",
    "\n",
    "# Parse emotion dicts\n",
    "df = df.copy()\n",
    "df['emotions_dict'] = df['emotions'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)\n",
    "all_emotions = sorted({k for d in df['emotions_dict'] for k in d.keys()})\n",
    "\n",
    "model_stats = []\n",
    "for model, group in df.groupby('annotator'):\n",
    "    # Build emotion matrix: rows=annotations, cols=emotions\n",
    "    emotion_matrix = []\n",
    "    n_nonzero_per_ann = []\n",
    "    for d in group['emotions_dict']:\n",
    "        row = [d.get(e, np.nan) for e in all_emotions]\n",
    "        emotion_matrix.append(row)\n",
    "        n_nonzero_per_ann.append(np.sum(np.array(row) > 0))\n",
    "    emotion_matrix = np.array(emotion_matrix, dtype=np.float64)\n",
    "    # Per-emotion stats\n",
    "    means = np.nanmean(emotion_matrix, axis=0)\n",
    "    medians = np.nanmedian(emotion_matrix, axis=0)\n",
    "    iqrs = iqr(emotion_matrix, axis=0, nan_policy='omit')\n",
    "    # Outlier detection (per emotion, 1.5*IQR rule)\n",
    "    outlier_counts = []\n",
    "    for i, e in enumerate(all_emotions):\n",
    "        col = emotion_matrix[:, i]\n",
    "        q1 = np.nanpercentile(col, 25)\n",
    "        q3 = np.nanpercentile(col, 75)\n",
    "        iqr_val = q3 - q1\n",
    "        lower = q1 - 1.5 * iqr_val\n",
    "        upper = q3 + 1.5 * iqr_val\n",
    "        outlier_counts.append(np.sum((col < lower) | (col > upper)))\n",
    "    n_ann = emotion_matrix.shape[0]\n",
    "    n_emotions = emotion_matrix.shape[1]\n",
    "    # Aggregate across emotions\n",
    "    model_stats.append({\n",
    "        'Model': model,\n",
    "        'Mean': np.nanmean(means),\n",
    "        'Median': np.nanmean(medians),\n",
    "        'IQR': np.nanmean(iqrs),\n",
    "        'Nonzero/ann': np.mean(n_nonzero_per_ann),\n",
    "        'Outlier frac': np.sum(outlier_counts) / (n_ann * n_emotions)\n",
    "    })\n",
    "\n",
    "# Create DataFrame and format as LaTeX table\n",
    "stats_df = pd.DataFrame(model_stats)\n",
    "stats_df = stats_df.sort_values(by='Mean', ascending=False)\n",
    "latex_table = stats_df.to_latex(index=False, caption=\"Averaged emotion statistics per model\", label=\"tab:model_emotion_stats\", float_format=\"%.3f\")\n",
    "print(latex_table)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1ded4dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import ast # For safely evaluating string representations of dicts\n",
    "from scipy.stats import f_oneway, kruskal # For ANOVA and Kruskal-Wallis\n",
    "\n",
    "# --- 1. Parse the 'emotions' column ---\n",
    "# Ensure it's safe by using ast.literal_eval\n",
    "try:\n",
    "    df['emotions_dict'] = df['emotions'].apply(ast.literal_eval)\n",
    "except (ValueError, SyntaxError) as e:\n",
    "    print(f\"Error parsing 'emotions' column: {e}\")\n",
    "    print(\"Please ensure the 'emotions' column contains valid Python dictionary strings.\")\n",
    "    exit()\n",
    "\n",
    "# --- 2. Transform data to long format (one row per emotion score) ---\n",
    "# This dataframe will be used for calculating p-values\n",
    "rows_list = []\n",
    "for index, row in df.iterrows():\n",
    "    annotator = row['annotator']\n",
    "    # audio_id = row['audioId'] # Not strictly needed for p-value calc per emotion\n",
    "    for emotion, score in row['emotions_dict'].items():\n",
    "        rows_list.append({'annotator': annotator, 'emotion': emotion, 'score': score})\n",
    "\n",
    "long_df = pd.DataFrame(rows_list)\n",
    "# print(\"Long format DataFrame for p-value calculation:\\n\", long_df.head())\n",
    "\n",
    "# --- 3. Group by emotion and annotator, then pivot ---\n",
    "# This gives mean scores for each emotion by each annotator\n",
    "# This is what will primarily form the columns of your table\n",
    "mean_scores_by_annotator_emotion = long_df.groupby(['emotion', 'annotator'])['score'].mean().unstack()\n",
    "\n",
    "# Rename columns for clarity if needed (e.g., 'Model 1' instead of 'Hume Voice')\n",
    "# For now, we'll use the annotator names directly.\n",
    "# You can create a mapping: model_name_mapping = {'Hume Voice': 'Model 1', 'Model Alpha': 'Model 2'}\n",
    "# mean_scores_by_annotator_emotion = mean_scores_by_annotator_emotion.rename(columns=model_name_mapping)\n",
    "\n",
    "# --- 4. Calculate 'Mean' and 'Variance' across models for each emotion ---\n",
    "# mean across the annotator columns\n",
    "mean_scores_by_annotator_emotion['Mean'] = mean_scores_by_annotator_emotion.mean(axis=1)\n",
    "# variance across the annotator columns\n",
    "mean_scores_by_annotator_emotion['Variance'] = mean_scores_by_annotator_emotion.var(axis=1)\n",
    "\n",
    "# --- 5. Calculate 'p-value' for difference between groups (annotators) for each emotion ---\n",
    "p_values = {}\n",
    "for emotion in long_df['emotion'].unique():\n",
    "    emotion_data = long_df[long_df['emotion'] == emotion]\n",
    "    \n",
    "    # Create a list of score arrays, one for each annotator\n",
    "    groups = []\n",
    "    annotators_for_emotion = emotion_data['annotator'].unique()\n",
    "    \n",
    "    # Ensure we have at least 2 groups to compare\n",
    "    if len(annotators_for_emotion) < 2:\n",
    "        p_values[emotion] = float('nan') # Not enough groups to compare\n",
    "        continue\n",
    "        \n",
    "    for annotator in annotators_for_emotion:\n",
    "        scores = emotion_data[emotion_data['annotator'] == annotator]['score'].values\n",
    "        if len(scores) > 0: # Ensure there are scores for this annotator for this emotion\n",
    "             groups.append(scores)\n",
    "    \n",
    "    if len(groups) < 2: # Still not enough groups with actual data\n",
    "        p_values[emotion] = float('nan')\n",
    "        continue\n",
    "\n",
    "    # Perform ANOVA if data is somewhat normal and variances are similar (assumption)\n",
    "    # Otherwise, Kruskal-Wallis (non-parametric) is safer\n",
    "    # For simplicity, let's use Kruskal-Wallis as it has fewer assumptions.\n",
    "    # If you have many samples per group and believe normality holds, f_oneway is an option.\n",
    "    \n",
    "    # Check if all groups have sufficient samples (Kruskal-Wallis usually needs at least 5 per group for good power,\n",
    "    # but will run with fewer. ANOVA also has sample size considerations)\n",
    "    # For this example, we'll run it even with small sample sizes.\n",
    "    \n",
    "    # Ensure all groups are non-empty before passing to kruskal\n",
    "    valid_groups = [g for g in groups if len(g) > 0]\n",
    "    \n",
    "    if len(valid_groups) >= 2: # Need at least two non-empty groups\n",
    "        try:\n",
    "            # Test if all groups are identical (Kruskal might raise error)\n",
    "            # This is a simple check, more robust checks might be needed\n",
    "            all_identical = True\n",
    "            first_group_tuple = tuple(sorted(valid_groups[0]))\n",
    "            for g_idx in range(1, len(valid_groups)):\n",
    "                if tuple(sorted(valid_groups[g_idx])) != first_group_tuple:\n",
    "                    all_identical = False\n",
    "                    break\n",
    "            \n",
    "            if all_identical and len(valid_groups) > 1:\n",
    "                 # If all groups are identical, p-value should be 1 (no difference)\n",
    "                 # Kruskal might return NaN or error if all groups are exactly the same small sample.\n",
    "                 stat, p_value = None, 1.0\n",
    "            else:\n",
    "                stat, p_value = kruskal(*valid_groups)\n",
    "            p_values[emotion] = p_value\n",
    "        except ValueError as e:\n",
    "            # print(f\"Could not compute Kruskal-Wallis for emotion '{emotion}': {e}\")\n",
    "            # This can happen if all values within groups are identical, or too few samples.\n",
    "            p_values[emotion] = float('nan') \n",
    "    else:\n",
    "        p_values[emotion] = float('nan')\n",
    "\n",
    "# Add p-values to the table\n",
    "mean_scores_by_annotator_emotion['p-value (K-W)'] = pd.Series(p_values)\n",
    "\n",
    "# --- 6. Prepare for LaTeX output ---\n",
    "# Sort columns: models first, then Mean, Variance, p-value\n",
    "model_cols = [col for col in mean_scores_by_annotator_emotion.columns if col not in ['Mean', 'Variance', 'p-value (K-W)']]\n",
    "ordered_cols = model_cols + ['Mean', 'Variance', 'p-value (K-W)']\n",
    "final_table_df = mean_scores_by_annotator_emotion[ordered_cols]\n",
    "\n",
    "# Rename index to 'Emotion' for the table\n",
    "final_table_df.index.name = 'Emotion'\n",
    "\n",
    "# Format floating point numbers for LaTeX\n",
    "float_format_func = lambda x: f\"{x:.2f}\" if pd.notnull(x) else \"NaN\"\n",
    "\n",
    "# Generate LaTeX table\n",
    "# Ensure column names are LaTeX-friendly (e.g., escape underscores if any model names have them)\n",
    "# For this example, our model names ('Hume Voice', 'Model Alpha', 'Model Beta') are fine.\n",
    "# If you had 'Model_Alpha', you'd need to replace '_' with '\\_'.\n",
    "# final_table_df.columns = [col.replace('_', '\\\\_') for col in final_table_df.columns]\n",
    "\n",
    "\n",
    "# Define column alignment: 'l' for emotion, 'r' for numeric values\n",
    "num_cols = len(final_table_df.columns)\n",
    "col_format = \"l\" + \"r\" * num_cols \n",
    "\n",
    "try:\n",
    "    latex_table = final_table_df.to_latex(\n",
    "        float_format=float_format_func,\n",
    "        column_format=col_format,\n",
    "        na_rep='NaN', # Representation of NaN in the table\n",
    "        escape=True,  # Escapes LaTeX special characters (like %)\n",
    "        caption=\"Comparison of Emotion Scores by Annotator Model\",\n",
    "        label=\"tab:emotion_comparison\"\n",
    "    )\n",
    "    print(\"\\n--- LaTeX Table ---\")\n",
    "    print(latex_table)\n",
    "except Exception as e:\n",
    "    print(f\"Error generating LaTeX table: {e}\")\n",
    "    print(\"Ensure your column names do not contain problematic characters for LaTeX or use escape=False with caution.\")\n",
    "\n",
    "\n",
    "print(\"\\n--- Final DataFrame for Table (for inspection) ---\")\n",
    "print(final_table_df.to_string(float_format=float_format_func))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7c9f89e3",
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "\n",
    "# Prepare data for radar plot: mean per model per emotion\n",
    "emotion_order = final_table_df.index.tolist()\n",
    "model_names = [col for col in final_table_df.columns if col not in ['Mean', 'Variance', 'p-value (K-W)']]\n",
    "\n",
    "def get_emotion_stats(df, model_names, emotion_order):\n",
    "    # Parse emotion dicts if not already present\n",
    "    if 'emotions_dict' not in df.columns:\n",
    "        df = df.copy()\n",
    "        df['emotions_dict'] = df['emotions'].apply(ast.literal_eval)\n",
    "    stats = {model: {'mean': [], 'q1': [], 'q3': []} for model in model_names}\n",
    "    for emotion in emotion_order:\n",
    "        for model in model_names:\n",
    "            scores = []\n",
    "            for d, a in zip (df['emotions_dict'], df['annotator']):\n",
    "                if a == model and emotion in d:\n",
    "                    scores.append(d[emotion])\n",
    "            if scores:\n",
    "                arr = np.array(scores)\n",
    "                stats[model]['mean'].append(np.mean(arr))\n",
    "                stats[model]['q1'].append(np.percentile(arr, 25))\n",
    "                stats[model]['q3'].append(np.percentile(arr, 75))\n",
    "            else:\n",
    "                stats[model]['mean'].append(np.nan)\n",
    "                stats[model]['q1'].append(np.nan)\n",
    "                stats[model]['q3'].append(np.nan)\n",
    "    return stats\n",
    "\n",
    "stats = get_emotion_stats(df, model_names, emotion_order)\n",
    "\n",
    "# Sort emotions by descending average value across all models\n",
    "mean_scores = final_table_df['Mean']\n",
    "sorted_emotions = mean_scores.sort_values(ascending=False).index.tolist()\n",
    "\n",
    "# Radar plot setup\n",
    "num_vars = len(sorted_emotions)\n",
    "angles = np.linspace(0, 2 * np.pi, num_vars, endpoint=False).tolist()\n",
    "angles += angles[:1]\n",
    "\n",
    "plt.figure(figsize=(12, 12))\n",
    "ax = plt.subplot(111, polar=True)\n",
    "colors = plt.cm.tab10.colors\n",
    "\n",
    "for idx, model in enumerate(model_names):\n",
    "    means = [stats[model]['mean'][emotion_order.index(e)] for e in sorted_emotions]\n",
    "    means = [np.log1p(x) if x is not None and not np.isnan(x) else np.nan for x in means]\n",
    "    means += means[:1]\n",
    "    ax.plot(angles, means, label=model, color=colors[idx % len(colors)], linewidth=2)\n",
    "    ax.fill(angles, means, color=colors[idx % len(colors)], alpha=0.1)\n",
    "\n",
    "# Add emotion labels with improved spacing and line break after \"/\"\n",
    "ax.set_xticks(angles[:-1])\n",
    "for label, angle in zip(sorted_emotions, angles[:-1]):\n",
    "    pretty_label = label.replace(\"/\", \"/\\n\")\n",
    "    rotation = np.degrees(angle)\n",
    "    if rotation > 90 and rotation < 270:\n",
    "        alignment = \"right\"\n",
    "        rotation = rotation + 180\n",
    "    else:\n",
    "        alignment = \"left\"\n",
    "    ax.text(\n",
    "        angle,\n",
    "        ax.get_rmax() + 0.1,\n",
    "        pretty_label,\n",
    "        size=11,\n",
    "        horizontalalignment=alignment,\n",
    "        verticalalignment=\"center\",\n",
    "        rotation=rotation,\n",
    "        rotation_mode=\"anchor\"\n",
    "    )\n",
    "ax.set_xticklabels([])\n",
    "ax.set_yticklabels([])\n",
    "\n",
    "plt.tight_layout()\n",
    "\n",
    "plt.legend(\n",
    "    loc='lower right',\n",
    "    bbox_to_anchor=(1.25, -0.15)\n",
    ")\n",
    "plt.savefig('./output/emotion_scores_by_model_radar_log1p.png', dpi=600, bbox_inches='tight')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
