{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_rounded_cost(filename):\n",
    "    # Open the file for reading\n",
    "    with open(filename, 'r') as file:\n",
    "        # Read the entire content of the file\n",
    "        content = file.read()\n",
    "\n",
    "    # Find all occurrences of the target string followed by a number\n",
    "    import re\n",
    "    pattern = r'the rounded cost is  (\\d+(\\.\\d+)?)'\n",
    "    matches = re.findall(pattern, content)\n",
    "\n",
    "    # Extract the numbers from the matches\n",
    "    numbers = [float(match[0]) for match in matches]\n",
    "\n",
    "    return numbers\n",
    "\n",
    "def extract_fractional_cost(filename):\n",
    "    # Open the file for reading\n",
    "    with open(filename, 'r') as file:\n",
    "        # Read the entire content of the file\n",
    "        content = file.read()\n",
    "\n",
    "    # Find all occurrences of the target string followed by a number\n",
    "    import re\n",
    "    pattern = r' the violation is  (\\d+(\\.\\d+)?)'\n",
    "    matches = re.findall(pattern, content)\n",
    "\n",
    "    # Extract the numbers from the matches\n",
    "    numbers = [float(match[0]) for match in matches]\n",
    "\n",
    "    return numbers\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# fractional_cost = np.array(extract_fractional_cost('bank.txt')).reshape([4, -1])\n",
    "# rounded_cost = np.array(extract_rounded_cost('output.txt')).reshape([2,-1])\n",
    "# fractional_cost"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'np' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 6\u001b[0m\n\u001b[1;32m      3\u001b[0m k_range \u001b[39m=\u001b[39m \u001b[39mrange\u001b[39m(\u001b[39m2\u001b[39m,\u001b[39m22\u001b[39m)\n\u001b[1;32m      5\u001b[0m \u001b[39m# Sample data for plotting\u001b[39;00m\n\u001b[0;32m----> 6\u001b[0m x \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mlinspace(\u001b[39m0\u001b[39m, \u001b[39m10\u001b[39m, \u001b[39m100\u001b[39m)\n\u001b[1;32m      7\u001b[0m y1 \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39msin(x)\n\u001b[1;32m      8\u001b[0m y2 \u001b[39m=\u001b[39m np\u001b[39m.\u001b[39mcos(x)\n",
      "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "filelist = ['output/bank.txt', 'output/adult.txt', 'output/creditcard.txt', 'output/census1990.txt']\n",
    "titles = ['bank',  'adult','creditcard', 'census']\n",
    "k_range = range(2,22)\n",
    "\n",
    "# Sample data for plotting\n",
    "x = np.linspace(0, 10, 100)\n",
    "y1 = np.sin(x)\n",
    "y2 = np.cos(x)\n",
    "\n",
    "# Set up the figure and subplots\n",
    "fig, axs = plt.subplots(1, 4, figsize=(20, 5))\n",
    "\n",
    "# Plotting two lines in each subplot\n",
    "for i in range(4):\n",
    "    fractional_cost = np.array(extract_fractional_cost(filelist[i])).reshape([-1, 2])\n",
    "    # rounded_cost = np.array(extract_rounded_cost(filelist[i])).reshape([-1,2])\n",
    "    axs[i].plot(k_range, fractional_cost[:, 0], label='our', markerfacecolor = 'none',markersize = 15, marker = 'o', linewidth = 1)\n",
    "    axs[i].plot(k_range, fractional_cost[:, 1], label='k-means', markerfacecolor = 'none',markersize = 15, marker = '+', linewidth = 1)\n",
    "    # axs[i].plot(k_range, fractional_cost[:, 2], label='Ours', markerfacecolor = 'none',markersize = 15, marker = 's', linewidth = 1)\n",
    "    # axs[i].plot(k_range, fractional_cost[:, 3], label='ORL21_fractional', markerfacecolor = 'none',markersize = 15, marker = '.', linewidth = 1)\n",
    "    # axs[i].plot(k_range, rounded_cost[:, 0], label='baseline_rounded')\n",
    "    # axs[i].plot(k_range, rounded_cost[:, 1], label='S_rounded')\n",
    "    \n",
    "    axs[i].set_title(titles[i], fontsize = 30,weight = 'bold')\n",
    "    axs[i].legend()\n",
    "    \n",
    "    axs[i].grid(True)\n",
    "    axs[i].spines['top'].set_linewidth(3)\n",
    "    axs[i].spines['bottom'].set_linewidth(3)\n",
    "    axs[i].spines['left'].set_linewidth(3)\n",
    "    axs[i].spines['right'].set_linewidth(3)\n",
    "    axs[i].tick_params(labelsize = 20,axis='both', which='both', length=10, width=5, direction='out')\n",
    "    axs[i].set_xlabel(r'$k$',fontsize = 30,weight = 'bold')\n",
    "    axs[0].set_ylabel('Cost',fontsize = 30,weight = 'bold')\n",
    "    \n",
    "fig.subplots_adjust(wspace=0.4,hspace=0.4)\n",
    "# Display the plot\n",
    "plt.tight_layout()\n",
    "plt.savefig('cost.eps')\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "ph",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
