{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import ast\n",
    "import numpy as np\n",
    "response_data = pd.read_csv('responses_difficulty_time.csv')\n",
    "# Get unique treatment values\n",
    "unique_treatments = response_data['treatment'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "unique_treatments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response_data.head(18)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "response_data = pd.read_csv('responses_difficulty_time.csv')\n",
    "\n",
    "# Calculate the average time spent for each treatment\n",
    "average_time_by_treatment = response_data.groupby('treatment')['time'].mean()\n",
    "\n",
    "print(average_time_by_treatment.sort_values(ascending=True))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "data = pd.read_csv('responses_difficulty_time.csv')\n",
    "\n",
    "# Function to perform bootstrapping and calculate 95% CI\n",
    "def bootstrap_ci(data, n_bootstraps=1000, ci=95):\n",
    "    bootstrap_means = []\n",
    "    for _ in range(n_bootstraps):\n",
    "        sample = np.random.choice(data, size=len(data), replace=True)\n",
    "        bootstrap_means.append(np.mean(sample))\n",
    "    lower_bound = np.percentile(bootstrap_means, (100-ci)/2)\n",
    "    upper_bound = np.percentile(bootstrap_means, 100-(100-ci)/2)\n",
    "    return np.mean(data), lower_bound, upper_bound\n",
    "\n",
    "# Filter data for tutorials and questions\n",
    "tutorials = data[data['page'].str.contains('tutorial')]\n",
    "questions = data[data['page'].str.contains('question')]\n",
    "\n",
    "# Group by treatment and calculate stats using bootstrapping\n",
    "tutorial_stats = tutorials.groupby('treatment')['time'].apply(bootstrap_ci)\n",
    "question_stats = questions.groupby('treatment')['time'].apply(bootstrap_ci)\n",
    "\n",
    "# Convert the results to a DataFrame for easier plotting\n",
    "tutorial_df = pd.DataFrame(tutorial_stats.tolist(), index=tutorial_stats.index, columns=['Mean', 'Lower CI', 'Upper CI'])\n",
    "question_df = pd.DataFrame(question_stats.tolist(), index=question_stats.index, columns=['Mean', 'Lower CI', 'Upper CI'])\n",
    "\n",
    "combined_df = pd.DataFrame({\n",
    "    'Tutorial Mean': tutorial_df['Mean'],\n",
    "    'Tutorial Lower CI': tutorial_df['Lower CI'],\n",
    "    'Tutorial Upper CI': tutorial_df['Upper CI'],\n",
    "    'Question Mean': question_df['Mean'],\n",
    "    'Question Lower CI': question_df['Lower CI'],\n",
    "    'Question Upper CI': question_df['Upper CI']\n",
    "})\n",
    "\n",
    "# Treatment labels for internal use\n",
    "unique_treatments = ['top-none', 'top-top', 'top-subset3', 'top-rank', \n",
    "                     'subset2-subset2', 'subset3-rank', 'rank-none', 'rank-top', 'rank-rank']\n",
    "combined_df = combined_df.reindex(unique_treatments)\n",
    "# Labels for the x-axis\n",
    "display_labels = ['Top_None', 'Top_Top', 'Top_3-Approval', 'Top_Rank', \n",
    "                  '2-Approval_2-Approval', '3-Approval_Rank', 'Rank_None', 'Rank_Top', 'Rank_Rank']\n",
    "\n",
    "# Correcting the plotting code for side by side bars\n",
    "fig, ax = plt.subplots(figsize=(14, 7))\n",
    "\n",
    "# Error bars setup\n",
    "tutorial_errors = [combined_df['Tutorial Mean'] - combined_df['Tutorial Lower CI'], combined_df['Tutorial Upper CI'] - combined_df['Tutorial Mean']]\n",
    "question_errors = [combined_df['Question Mean'] - combined_df['Question Lower CI'], combined_df['Question Upper CI'] - combined_df['Question Mean']]\n",
    "\n",
    "# Index for the groups\n",
    "ind = np.arange(len(combined_df))  # the x locations for the groups\n",
    "width = 0.35  # the width of the bars\n",
    "\n",
    "# Plotting tutorials and questions\n",
    "rects1 = ax.bar(ind - width/2, combined_df['Tutorial Mean'], width, yerr=tutorial_errors, label='Tutorials', capsize=4, color='skyblue')\n",
    "rects2 = ax.bar(ind + width/2, combined_df['Question Mean'], width, yerr=question_errors, label='Questions', capsize=4, color='orange')\n",
    "\n",
    "# Add some text for labels, title, and axes ticks\n",
    "ax.set_xlabel('Elicitation Format', fontsize=14)\n",
    "ax.set_ylabel('Time (seconds)', fontsize=14)\n",
    "ax.set_xticks(ind)\n",
    "ax.set_xticklabels(display_labels, rotation=45, fontsize=12)\n",
    "ax.legend()\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "question_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from ast import literal_eval\n",
    "\n",
    "response_data = pd.read_csv('responses_difficulty_time.csv')\n",
    "\n",
    "# Filter for 'review-1' and 'review-2' pages only\n",
    "review_data = response_data[response_data['page'].isin(['review-1', 'review-2'])]\n",
    "\n",
    "# Since the 'response' column is a string representation of a list, convert it to an actual list\n",
    "review_data['response'] = review_data['response'].apply(literal_eval)\n",
    "\n",
    "# Extract the 'ease' and 'amount' from the 'response' column for each entry\n",
    "review_data['ease'] = review_data['response'].apply(lambda x: x[0])\n",
    "review_data['amount'] = review_data['response'].apply(lambda x: x[1])\n",
    "\n",
    "# Get frequencies for 'ease' and 'amount' for each 'treatment' in 'review-1' and 'review-2'\n",
    "ease_frequencies = review_data.groupby('treatment')['ease'].value_counts().unstack(fill_value=0)\n",
    "amount_frequencies = review_data.groupby('treatment')['amount'].value_counts().unstack(fill_value=0)\n",
    "\n",
    "# Display the frequencies\n",
    "print(\"Ease of Elicitation Frequencies:\")\n",
    "print(ease_frequencies)\n",
    "print(\"\\nAmount of Information Frequencies:\")\n",
    "print(amount_frequencies)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from ast import literal_eval\n",
    "from matplotlib.ticker import MaxNLocator\n",
    "\n",
    "# Load the data\n",
    "response_data = pd.read_csv('responses_difficulty_time.csv')\n",
    "\n",
    "# Get unique treatment values and sort them if necessary\n",
    "unique_treatments = ['top-none', 'top-top', 'top-subset3', 'top-rank', \n",
    "                     'subset2-subset2', 'subset3-rank', 'rank-none', 'rank-top', 'rank-rank']\n",
    "\n",
    "# Calculate the average time spent for each treatment and sort by the unique treatments\n",
    "average_time_by_treatment = response_data.groupby('treatment')['time'].mean().reindex(unique_treatments)\n",
    "\n",
    "# Filter for 'review-1' and 'review-2' pages only\n",
    "review_data = response_data[response_data['page'].isin(['review-1', 'review-2'])].copy()\n",
    "\n",
    "# Convert the 'response' column from a string representation of a list to an actual list\n",
    "review_data.loc[:, 'response'] = review_data['response'].apply(literal_eval)\n",
    "\n",
    "# Extract 'ease' and 'amount' from the 'response' column\n",
    "review_data.loc[:, 'ease'] = review_data['response'].apply(lambda x: x[0])\n",
    "review_data.loc[:, 'amount'] = review_data['response'].apply(lambda x: x[1])\n",
    "\n",
    "# Get frequencies for 'ease' and 'amount' for each treatment\n",
    "ease_frequencies = review_data.groupby('treatment')['ease'].value_counts().unstack(fill_value=0)\n",
    "amount_frequencies = review_data.groupby('treatment')['amount'].value_counts().unstack(fill_value=0)\n",
    "\n",
    "common_palette = sns.color_palette('deep', n_colors=5)\n",
    "\n",
    "\n",
    "ease_order = ['Very Easy', 'Easy', 'Neutral', 'Difficult', 'Very Difficult']\n",
    "amount_order = ['Very Little', 'Little', 'Adequate', 'Significant', 'Very Significant']\n",
    "\n",
    "ease_frequencies = ease_frequencies.reindex(columns=ease_order)\n",
    "amount_frequencies = amount_frequencies.reindex(columns=amount_order)\n",
    "\n",
    "# Ensure the data is ordered according to 'unique_treatments'\n",
    "ease_frequencies_ordered = ease_frequencies.reindex(unique_treatments)\n",
    "amount_frequencies_ordered = amount_frequencies.reindex(unique_treatments)\n",
    "\n",
    "\n",
    "total_responses_per_treatment = review_data.groupby('treatment').size()\n",
    "ease_frequencies_percentage = (ease_frequencies_ordered.div(total_responses_per_treatment, axis=0) * 100).reindex(unique_treatments)\n",
    "amount_frequencies_percentage = (amount_frequencies_ordered.div(total_responses_per_treatment, axis=0) * 100).reindex(unique_treatments)\n",
    "\n",
    "display_labels = ['Top_None', 'Top_Top', 'Top_3-Approval', 'Top_Rank', \n",
    "                  '2-Approval_2-Approval', '3-Approval_Rank', 'Rank_None', 'Rank_Top', 'Rank_Rank']\n",
    "\n",
    "plt.figure(figsize=(20, 12))\n",
    "ax = ease_frequencies_percentage.plot(kind='bar', stacked=True, color=common_palette, width=0.5)\n",
    "ax.set_title('Perceived Difficulty', pad=40)\n",
    "ax.set_ylabel('Percentage of Users')\n",
    "ax.set_xlabel('Elicitation Format')\n",
    "ax.set_xticklabels(display_labels, rotation=45, fontsize=8)\n",
    "ax.tick_params(axis='x', pad=1)\n",
    "ax.xaxis.set_major_locator(MaxNLocator(len(display_labels)))\n",
    "\n",
    "ax.legend(fontsize='small', title_fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.2), ncol=len(ease_frequencies_percentage.columns))\n",
    "\n",
    "plt.tight_layout(pad=1)\n",
    "plt.show()\n",
    "\n",
    "plt.figure(figsize=(20, 12))\n",
    "ax = amount_frequencies_percentage.plot(kind='bar', stacked=True, color=common_palette, width=0.5)\n",
    "ax.set_title('Perceived Expressiveness', pad=40)\n",
    "ax.set_ylabel('Percentage of Users')\n",
    "ax.set_xlabel('Elicitation Format')\n",
    "ax.set_xticklabels(display_labels, rotation=45, fontsize=8)\n",
    "ax.tick_params(axis='x', pad=1)\n",
    "ax.xaxis.set_major_locator(MaxNLocator(len(display_labels)))\n",
    "\n",
    "ax.legend(fontsize='small', title_fontsize='small', loc='upper center', bbox_to_anchor=(0.5, 1.2), ncol=len(amount_frequencies_percentage.columns))\n",
    "\n",
    "plt.tight_layout(pad=1)\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
