{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "3c0fb20b-d3ef-4172-a26c-8e6d8ebde711",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.9051422865701447, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]\n",
      "98\n",
      "0.9990418412784863\n",
      "0.9051422865701447\n"
     ]
    }
   ],
   "source": [
    "# Code to evaluate the performance of clustering algorithms. \n",
    "\n",
    "import pandas as pd\n",
    "from sklearn.metrics import adjusted_rand_score\n",
    "\n",
    "file = '~/CLFL-main/large0/logs/server_results.csv'\n",
    "# read the CSV file\n",
    "df = pd.read_csv(file, delimiter=',', header=0)\n",
    "\n",
    "# loop through each row\n",
    "scores = []\n",
    "for index, row in df.iterrows():\n",
    "    # convert clusters and concepts to lists of integers\n",
    "    if ',' in row['user_clusters']:\n",
    "        clusters = list(map(int, row['user_clusters'][1:-1].split(',')))\n",
    "    else:    \n",
    "        clusters = list(map(int, row['user_clusters'][1:-1].split()))\n",
    "    concepts = list(map(int, row['user_concepts'][1:-1].split(',')))\n",
    "    \n",
    "    # compute adjusted_rand_score\n",
    "    if index==0:\n",
    "        continue\n",
    "    score = adjusted_rand_score(concepts, clusters)\n",
    "    scores.append(score)\n",
    "    # print the score for this row\n",
    "my_list = scores    \n",
    "print(scores)\n",
    "# count_not_one = len(my_list) - my_list.count(1)\n",
    "print(my_list.count(1))\n",
    "print((sum(my_list))/ len(my_list))\n",
    "print(min(my_list))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 170,
   "id": "6cdf63dd-1d0e-42cc-8884-152d8e284995",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[383, 392, 391, 409, 405]\n",
      "[383, 392, 391, 409, 405]\n",
      "1.0\n"
     ]
    }
   ],
   "source": [
    "# Code to test concept matching accuracy\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "directory_path = \"/l/data/xj8/CLFL-main/small\"\n",
    "n_concept =5\n",
    "\n",
    "file = '%s/server_results.csv'%directory_path\n",
    "# read the CSV file\n",
    "df = pd.read_csv(file, delimiter=',', header=0)\n",
    "\n",
    "# loop through each row\n",
    "def my_contingency_matrix(n, labels_true, labels_pred):\n",
    "    n_classes = len(np.unique(labels_true))\n",
    "    n_clusters = len(np.unique(labels_pred))\n",
    "    \n",
    "    contingency = np.zeros((n, n))\n",
    "    \n",
    "    for i in range(len(labels_true)):\n",
    "        if labels_pred[i]>=n:\n",
    "            continue\n",
    "        contingency[labels_true[i], labels_pred[i]] += 1\n",
    "    \n",
    "    return contingency\n",
    "            \n",
    "scores = []\n",
    "for index, row in df.iterrows():\n",
    "    # convert clusters and concepts to lists of integers\n",
    "    if ',' in row['user_clusters']:\n",
    "        clusters = list(map(int, row['user_clusters'][1:-1].split(',')))\n",
    "    else:    \n",
    "        clusters = list(map(int, row['user_clusters'][1:-1].split()))\n",
    "    concepts = list(map(int, row['user_concepts'][1:-1].split(',')))\n",
    "    \n",
    "    # compute adjusted_rand_score\n",
    "    m = my_contingency_matrix(n_concept, concepts, clusters)\n",
    "    # print(m)\n",
    "    mapping = np.argmax(m, axis=1)\n",
    "    scores.append(mapping)\n",
    "\n",
    "total = [0] *5\n",
    "correct = [0] *5\n",
    "# Iterate through all files in the directory\n",
    "for filename in os.listdir(directory_path):\n",
    "    # Check if the file name starts with \"cid\"\n",
    "    if filename.startswith('cid'):\n",
    "        # print(filename)\n",
    "        c_df = pd.read_csv('%s/%s'%(directory_path, filename), delimiter=',', header=0)\n",
    "        for index, row in c_df.iterrows():\n",
    "            if index==0:\n",
    "                continue\n",
    "            # print(index)    \n",
    "            cluster_match_str = df.iloc[index]['cluster_match']\n",
    "            cluster_match = list(map(int, cluster_match_str[1:-1].split(',')))\n",
    "            data_id = int(row['data_concept_id'])\n",
    "            total[data_id] =  total[data_id]+1 \n",
    "            if cluster_match[scores[index][data_id]] == int(row['model_id']):\n",
    "                correct[data_id] = correct[data_id]+1\n",
    "                \n",
    "print(total)\n",
    "print(correct)\n",
    "print((sum(correct))/(sum(total)))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
