{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from util import Dataset, Algorithm\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "def refine_optimal_cost(folder,out_folder, alg, dataset,file_name):\n",
    "    k_values = [10,20,30,50]\n",
    "    res = pickle.load(open(f\"{folder}/results_{file_name}.pkl\", \"rb\"))\n",
    "    for k in k_values:\n",
    "        subset_sizes = res.subset_sizes\n",
    "        original_cost = res.get(k,subset_sizes[0]).original_cost\n",
    "        best_cost_for_current_k = original_cost\n",
    "        \n",
    "        for subset_size in subset_sizes:\n",
    "            subset_original_cost = res.get(k, subset_size).subset_solution_original\n",
    "            for run in range(res.n_runs):\n",
    "                if subset_original_cost[run] < best_cost_for_current_k:\n",
    "                    best_cost_for_current_k = subset_original_cost[run]\n",
    "        if original_cost > best_cost_for_current_k:\n",
    "            print(f\"original cost: {original_cost}, best cost: {best_cost_for_current_k}, k:{k}\")\n",
    "            res.results[(k, subset_sizes[0])].original_cost= best_cost_for_current_k\n",
    "    pickle.dump(res, open(f\"{out_folder}/results_{file_name}.pkl\", \"wb\"))   \n",
    "\n",
    "# for dataset in Dataset:\n",
    "#     for alg in Algorithm:\n",
    "#         for j in [1,2,5]:\n",
    "#             for z in [1,3,4]:\n",
    "                # refine_optimal_cost(\"start_again\", \"results_refined\", alg, dataset, j, z)\n",
    "# refine_optimal_cost(\"results3\",\"results_refined\",Algorithm.K_Z_SUBSPACE, Dataset.COVTYPE, 2, 3)\n",
    "            "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "original cost: 3149.6386084351275, best cost: 3076.294348392143, k:10\n",
      "original cost: 2844.853450278033, best cost: 2828.7794448382665, k:20\n",
      "original cost: 2702.9497690236108, best cost: 2701.6737818627644, k:30\n",
      "original cost: 2543.8067042711864, best cost: 2518.7282238161206, k:50\n"
     ]
    },
    {
     "ename": "NameError",
     "evalue": "name 'j' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[1;32mIn[25], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m alg \u001b[39m=\u001b[39m Algorithm\u001b[39m.\u001b[39mKMEDIAN\n\u001b[0;32m      2\u001b[0m \u001b[39mfor\u001b[39;00m dataset \u001b[39min\u001b[39;00m Dataset:\n\u001b[1;32m----> 3\u001b[0m     refine_optimal_cost(\u001b[39m\"\u001b[39;49m\u001b[39mresults3\u001b[39;49m\u001b[39m\"\u001b[39;49m,\u001b[39m\"\u001b[39;49m\u001b[39mresults_refined\u001b[39;49m\u001b[39m\"\u001b[39;49m,alg, dataset, \u001b[39mf\u001b[39;49m\u001b[39m\"\u001b[39;49m\u001b[39m{\u001b[39;49;00malg\u001b[39m.\u001b[39;49mvalue\u001b[39m}\u001b[39;49;00m\u001b[39m_\u001b[39;49m\u001b[39m{\u001b[39;49;00mdataset\u001b[39m.\u001b[39;49mvalue\u001b[39m}\u001b[39;49;00m\u001b[39m\"\u001b[39;49m)\n",
      "Cell \u001b[1;32mIn[24], line 17\u001b[0m, in \u001b[0;36mrefine_optimal_cost\u001b[1;34m(folder, out_folder, alg, dataset, file_name)\u001b[0m\n\u001b[0;32m     15\u001b[0m         \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39moriginal cost: \u001b[39m\u001b[39m{\u001b[39;00moriginal_cost\u001b[39m}\u001b[39;00m\u001b[39m, best cost: \u001b[39m\u001b[39m{\u001b[39;00mbest_cost_for_current_k\u001b[39m}\u001b[39;00m\u001b[39m, k:\u001b[39m\u001b[39m{\u001b[39;00mk\u001b[39m}\u001b[39;00m\u001b[39m\"\u001b[39m)\n\u001b[0;32m     16\u001b[0m         res\u001b[39m.\u001b[39mresults[(k, subset_sizes[\u001b[39m0\u001b[39m])]\u001b[39m.\u001b[39moriginal_cost\u001b[39m=\u001b[39m best_cost_for_current_k\n\u001b[1;32m---> 17\u001b[0m pickle\u001b[39m.\u001b[39mdump(res, \u001b[39mopen\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00mout_folder\u001b[39m}\u001b[39;00m\u001b[39m/results_\u001b[39m\u001b[39m{\u001b[39;00malg\u001b[39m.\u001b[39mvalue\u001b[39m}\u001b[39;00m\u001b[39m_\u001b[39m\u001b[39m{\u001b[39;00mdataset\u001b[39m.\u001b[39mvalue\u001b[39m}\u001b[39;00m\u001b[39m_\u001b[39m\u001b[39m{\u001b[39;00mj\u001b[39m}\u001b[39;00m\u001b[39m_\u001b[39m\u001b[39m{\u001b[39;00mz\u001b[39m}\u001b[39;00m\u001b[39m.pkl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mwb\u001b[39m\u001b[39m\"\u001b[39m))\n",
      "\u001b[1;31mNameError\u001b[0m: name 'j' is not defined"
     ]
    }
   ],
   "source": [
    "alg = Algorithm.KMEDIAN\n",
    "for dataset in Dataset:\n",
    "    refine_optimal_cost(\"results3\",\"results_refined\",alg, dataset, f\"{alg.value}_{dataset.value}\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "subspace",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "3f94b3ea8b08becd1766c9f3a63569b610389292faace546d6478dffb17875b9"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
