{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "# ges_sk_result = pd.read_csv(\"set10/ges_f1_arrow_n_30_dom_2_den_2_samples_500_test_chisq.csv\")\n",
    "# data_list = ges_sk_result.values.tolist()\n",
    "# ges_list = [item for sublist in data_list for item in sublist]\n",
    "\n",
    "kword = \"arrow\"\n",
    "siz = \"2000\"\n",
    "\n",
    "if kword == \"skel\":\n",
    "    i = 0\n",
    "elif kword == \"arrow\":\n",
    "    i = 1\n",
    "\n",
    "ges_sk_result = pd.read_csv(\"set1/cpctheta_f1_\" + kword + \"_n_30_dom_2_den_2_samples_\"+siz+\"_test_chisq.csv\", header= None)\n",
    "data_list = ges_sk_result.values.tolist()\n",
    "ipc_list = [sublist[i] for sublist in data_list]\n",
    "\n",
    "ges_sk_result = pd.read_csv(\"set1/cpcpath_f1_\" + kword + \"_n_30_dom_2_den_2_samples_\"+ siz + \"_test_chisq.csv\", header= None)\n",
    "data_list = ges_sk_result.values.tolist()\n",
    "ipcpath_list = [sublist[i] for sublist in data_list]\n",
    "\n",
    "ges_sk_result = pd.read_csv(\"set1/ges_f1_\"+kword +\"_n_30_dom_2_den_2_samples_\"+siz +\"_test_chisq.csv\", header=None)\n",
    "data_list = ges_sk_result.values.tolist()\n",
    "ges_list = [sublist[i] for sublist in data_list]\n",
    "\n",
    "ges_sk_result = pd.read_csv(\"set1/grasp_f1_\"+kword +\"_n_30_dom_2_den_2_samples_\"+siz+\"_test_chisq.csv\", header=None)\n",
    "data_list = ges_sk_result.values.tolist()\n",
    "grasp_list = [sublist[i] for sublist in data_list]\n",
    "\n",
    "ges_sk_result = pd.read_csv(\"set1/kpc_f1_\"+kword+\"k_0_n_30_dom_2_den_2_samples_\"+siz+\"_test_chisq.csv\", header=None)\n",
    "data_list = ges_sk_result.values.tolist()\n",
    "kpc0_list = [sublist[i] for sublist in data_list]\n",
    "\n",
    "ges_sk_result = pd.read_csv(\"set1/kpc_f1_\"+kword+\"k_1_n_30_dom_2_den_2_samples_\"+siz+\"_test_chisq.csv\", header=None)\n",
    "data_list = ges_sk_result.values.tolist()\n",
    "kpc1_list = [sublist[i] for sublist in data_list]\n",
    "\n",
    "ges_sk_result = pd.read_csv(\"set1/kpc_f1_\"+kword+\"k_2_n_30_dom_2_den_2_samples_\"+siz+\"_test_chisq.csv\", header=None)\n",
    "data_list = ges_sk_result.values.tolist()\n",
    "kpc2_list = [sublist[i] for sublist in data_list]\n",
    "\n",
    "\n",
    "\n",
    "# print(len(ges_list))\n",
    "# print(len(ipc_list))\n",
    "# print(len(ipcpath_list))\n",
    "# print(len(grasp_list))\n",
    "# print(len(kpc2_list))\n",
    "# print(len(kpc1_list))\n",
    "# print(len(kpc0_list))\n",
    "a = {\n",
    "    'GES':ges_list,\n",
    "    'GRASP': grasp_list,\n",
    "    'kPC2': kpc2_list,\n",
    "    'kPC1': kpc1_list,\n",
    "    'kPC0': kpc0_list,\n",
    "    'CPC': ipc_list,\n",
    "    'CPC-Path': ipcpath_list\n",
    "}\n",
    "# a = {\n",
    "#     'kPC0': kpc0_list,\n",
    "#     'CPC': ipc_list,\n",
    "# }\n",
    "df = pd.DataFrame(a, columns=list(a.keys()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TtestResult(statistic=-14.041619828513545, pvalue=3.233070383244574e-39, df=598.0)\n",
      "Comparing GES and CPC:\n",
      "GES Avg1 and std: 0.22918598911044447, 0.007936313418245074\n",
      "CPC Avg1 and std: 0.35168875892913737, 0.003588031080163277\n",
      "T-statistic: -14.041619828513545\n",
      "P-value: 3.233070383244574e-39\n",
      "The difference in means is statistically significant.\n",
      "\n",
      "TtestResult(statistic=-19.456520944968748, pvalue=5.385216056547379e-66, df=598.0)\n",
      "Comparing GRASP and CPC:\n",
      "GRASP Avg1 and std: 0.19024582348715335, 0.007466400028497868\n",
      "CPC Avg1 and std: 0.35168875892913737, 0.003588031080163277\n",
      "T-statistic: -19.456520944968748\n",
      "P-value: 5.385216056547379e-66\n",
      "The difference in means is statistically significant.\n",
      "\n",
      "TtestResult(statistic=-40.62787704691613, pvalue=1.9487047506811364e-174, df=598.0)\n",
      "Comparing kPC2 and CPC:\n",
      "kPC2 Avg1 and std: 0.1207541947578925, 0.004396338112624566\n",
      "CPC Avg1 and std: 0.35168875892913737, 0.003588031080163277\n",
      "T-statistic: -40.62787704691613\n",
      "P-value: 1.9487047506811364e-174\n",
      "The difference in means is statistically significant.\n",
      "\n",
      "TtestResult(statistic=-22.15200529870826, pvalue=3.813173087971929e-80, df=598.0)\n",
      "Comparing kPC1 and CPC:\n",
      "kPC1 Avg1 and std: 0.21691107666467319, 0.0049010588890054515\n",
      "CPC Avg1 and std: 0.35168875892913737, 0.003588031080163277\n",
      "T-statistic: -22.15200529870826\n",
      "P-value: 3.813173087971929e-80\n",
      "The difference in means is statistically significant.\n",
      "\n",
      "TtestResult(statistic=-6.135004344173039, pvalue=7.748079245971304e-10, df=598.0)\n",
      "Comparing kPC0 and CPC:\n",
      "kPC0 Avg1 and std: 0.3245050412454726, 0.0025872084578327645\n",
      "CPC Avg1 and std: 0.35168875892913737, 0.003588031080163277\n",
      "T-statistic: -6.135004344173039\n",
      "P-value: 7.748079245971304e-10\n",
      "The difference in means is statistically significant.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "import itertools\n",
    "from scipy import stats\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "columns = df.columns\n",
    "# Iterate through all pairs of columns\n",
    "for col1, col2 in itertools.combinations(columns, 2):\n",
    "    if col1 != 'CPC' and col2 != 'CPC':\n",
    "        continue \n",
    "    if col1 == 'CPC':\n",
    "        continue\n",
    "    # Select the two columns\n",
    "    data1 = df[col1]\n",
    "    data2 = df[col2]\n",
    "    \n",
    "    # Perform the two-sample t-test\n",
    "    print(stats.ttest_ind(data1, data2, alternative='less'))\n",
    "    t_stat, p_value  = stats.ttest_ind(data1, data2, alternative='less')\n",
    "    \n",
    "    # Print the results\n",
    "    print(f'Comparing {col1} and {col2}:')\n",
    "    colavg = np.mean(data1)\n",
    "    colstd = np.std(data1)/np.sqrt(len(data1))\n",
    "    print(f'{col1} Avg1 and std: {colavg}, {colstd}')\n",
    "    colavg = np.mean(data2)\n",
    "    colstd = np.std(data2)/np.sqrt(len(data2))\n",
    "    print(f'{col2} Avg1 and std: {colavg}, {colstd}')\n",
    "    print(f'T-statistic: {t_stat}')\n",
    "    print(f'P-value: {p_value}')\n",
    "    \n",
    "    # Interpret the results\n",
    "    alpha = 0.05  # significance level\n",
    "    if p_value < alpha:\n",
    "        print(\"The difference in means is statistically significant.\\n\")\n",
    "    else:\n",
    "        print(\"The difference in means is not statistically significant.\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TtestResult(statistic=-3.320344712185242, pvalue=0.0004768973687228865, df=598.0)\n",
      "Comparing CPC-Path and CPC:\n",
      "CPC-Path Avg1 and std: 0.3349415980549827, 0.003532870008664008\n",
      "CPC Avg1 and std: 0.35168875892913737, 0.003588031080163277\n",
      "T-statistic: -3.320344712185242\n",
      "P-value: 0.0004768973687228865\n",
      "The difference in means is statistically significant.\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Select the two columns\n",
    "col1 ='CPC-Path'\n",
    "col2 = 'CPC'\n",
    "data1 = df[col1]\n",
    "data2 = df[col2]\n",
    "\n",
    "# Perform the two-sample t-test\n",
    "print(stats.ttest_ind(data1, data2, alternative='less'))\n",
    "t_stat, p_value  = stats.ttest_ind(data1, data2, alternative='less')\n",
    "\n",
    "# Print the results\n",
    "print(f'Comparing {col1} and {col2}:')\n",
    "colavg = np.mean(data1)\n",
    "colstd = np.std(data1)/np.sqrt(len(data1))\n",
    "print(f'{col1} Avg1 and std: {colavg}, {colstd}')\n",
    "colavg = np.mean(data2)\n",
    "colstd = np.std(data2)/np.sqrt(len(data2))\n",
    "print(f'{col2} Avg1 and std: {colavg}, {colstd}')\n",
    "print(f'T-statistic: {t_stat}')\n",
    "print(f'P-value: {p_value}')\n",
    "\n",
    "# Interpret the results\n",
    "alpha = 0.05  # significance level\n",
    "if p_value < alpha:\n",
    "    print(\"The difference in means is statistically significant.\\n\")\n",
    "else:\n",
    "    print(\"The difference in means is not statistically significant.\\n\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conditionally-closed",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
