{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "import random\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import torch.nn.functional as F\n",
    "import torch.optim as optim\n",
    "from torch.utils.data import Dataset, DataLoader\n",
    "from torch.nn.functional import normalize\n",
    "from knapsack_utils import *"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Data Generation\n",
    "\n",
    "We generate hard (computationally and for humans) knapsack instances. The labels are optimal solutions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "num_instances_train = 10000\n",
    "num_instances_test = 1000\n",
    "\n",
    "#we generate knapsack problems with 18 items and weights and values between 5 and 250\n",
    "min_items = 18\n",
    "max_items = 18\n",
    "min_weight = 5\n",
    "max_weight = 250\n",
    "filename_trainset = 'trainset.json'\n",
    "filename_testset = 'testset.json'\n",
    "generate_knapsack_instances(num_instances_train, min_items, max_items, min_weight, max_weight, filename_trainset, optima=True, solutions_filename='train_solution_storage.json')\n",
    "generate_knapsack_instances(num_instances_test, min_items, max_items, min_weight, max_weight, filename_testset,optima=True, solutions_filename='test_solution_storage.json')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## We load the generated file and create a trainset out of it\n",
    "with open('trainset.json', 'r') as f:\n",
    "    instances = json.load(f)\n",
    "df_instances = pd.DataFrame(instances)\n",
    "\n",
    "weights = []\n",
    "values = []\n",
    "Ws = []\n",
    "for i in df.index:\n",
    "    weight = df_instances[df_instances[\"id\"] == df.iloc[i][\"game_id\"]][\"weights\"].values[0]\n",
    "    value  = df_instances[df_instances[\"id\"] == df.iloc[i][\"game_id\"]][\"values\"].values[0]\n",
    "    W  = df_instances[df_instances[\"id\"] == df.iloc[i][\"game_id\"]][\"W\"].values[0]\n",
    "    weights.append(weight)\n",
    "    values.append(value)\n",
    "    Ws.append(W)\n",
    "df[\"weights\"] = weights\n",
    "df[\"values\"] = values\n",
    "df[\"W\"] = Ws"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "#We then convert it into a proper trainset and generate the optimal solution values\n",
    "X_train = []\n",
    "Y_train = []\n",
    "\n",
    "for i in df.index:\n",
    "    row = df.iloc[i]\n",
    "    W = row[\"W\"]\n",
    "    weights = row[\"weights\"]\n",
    "    values = row[\"values\"]\n",
    "\n",
    "    X_train.append([W]+weights+values + [sum(weights)] + [sum(values)]) # as commented in the paper we have 2*18+3 input features\n",
    "    solutions = knapsack_value_filter(W,weights,values,0)# 0 is the min value, increase this value to save computation and exclude solutions that have less than x value\n",
    "    solutions.sort(reverse=True)\n",
    "    # print(list(solutions[0][1]))\n",
    "    Y_train.append(solutions[0][1]) # pick the best solution\n",
    "\n",
    "X_train = np.array(X_train)\n",
    "Y_train = np.array(Y_train)\n",
    "np.save('X_train.npy',X_train)\n",
    "np.save('Y_train.npy',Y_train)\n",
    "#the data we used for training is included in the folder \"ks_tasks\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "## generate the testset\n",
    "df = pd.read_json(filename_testset) # we always tested our models on the same testset (located in ks_tasks/31012024_ks_tasks.json)\n",
    "#but we can also test on the generated testset from above\n",
    "X_test = []\n",
    "Y_test = []\n",
    "\n",
    "for i in df.index:\n",
    "    row = df.iloc[i]\n",
    "    W = row[\"W\"]\n",
    "    weights = row[\"weights\"]\n",
    "    values = row[\"values\"]\n",
    "    X_test.append([W]+weights+values + [sum(weights)] + [sum(values)])\n",
    "    solutions = knapsack_value_filter(W,weights,values,0)\n",
    "    solutions.sort(reverse=True)\n",
    "    Y_test.append(solutions[0][1])\n",
    "\n",
    "X_test = np.array(X_test)\n",
    "Y_test = np.array(Y_test)\n",
    "np.save('X_test.npy',X_test)\n",
    "np.save('Y_test.npy',Y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train and Test models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train = np.load('X_train.npy')\n",
    "Y_train = np.load('Y_train.npy')\n",
    "print(len(Y_train))\n",
    "sample_sizes = [100,500,1000] # training data sizes, we mainly manipulated this to vary performance\n",
    "repititions = 50 # how often we try in order to achieve the model performance we want to test\n",
    "test_means = []\n",
    "test_opts = []\n",
    "test_losses = []\n",
    "sols = []\n",
    "ns = []\n",
    "reps = []\n",
    "test_results = pd.DataFrame()\n",
    "filename_testset = 'testset.json' #e.g. ks_tasks.json\n",
    "with open(filename_testset, 'r') as f:\n",
    "    instances = json.load(f)\n",
    "    #instances = instances[:1000] exclude instances to speed up the process\n",
    "for _ in range(repititions):\n",
    "    for n in sample_sizes:\n",
    "        print(n)\n",
    "        sampled_indices = np.random.choice(len(X_train), size=n, replace=True) # sample from the pool\n",
    "        # Use the sampled indices to get the corresponding pairs of (x, y)\n",
    "        sampled_pairs = [(X_train[i], Y_train[i]) for i in sampled_indices]\n",
    "        X_sample, Y_sample = zip(*sampled_pairs)\n",
    "        train_data = Data(X_sample, Y_sample)\n",
    "        train_dataloader = DataLoader(dataset=train_data, batch_size=50, shuffle=True)\n",
    "        print(\"train start\")\n",
    "        model = train(train_dataloader,18,epochs = 10, lr=0.00005, step_size=6, gamma = 0.1) # we vary the epochs between 5 and 30 to achieve better/Worse performance\n",
    "        print(\"train end\")\n",
    "        torch.save(model, 'model_'+str(n)+'_'+str(_))##save the model\n",
    "        print(\"save_end\")\n",
    "       \n",
    "        model.eval()\n",
    "        test_mean, test_opt, test_loss, sol = test(instances, model, 18) # test the model on the testset and get mean utility values and loss\n",
    "        print(test_loss)\n",
    "        test_means.append(test_mean)\n",
    "        test_opts.append(test_opt)\n",
    "        test_losses.append(test_loss)\n",
    "        sols.append(sol)\n",
    "        ns.append(n)\n",
    "        reps.append(_)\n",
    "        test_results = pd.DataFrame()\n",
    "        test_results[\"test_means\"] = test_means\n",
    "        test_results[\"test_opts\"] = test_opts\n",
    "        test_results[\"test_loss\"] = test_losses\n",
    "        test_results[\"solutions\"] = sols\n",
    "        test_results[\"n\"] = ns\n",
    "        test_results[\"rep\"] = reps\n",
    "        test_results.to_csv('./model_evaluation.csv')# store the results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import ast\n",
    "instances_new = []\n",
    "counter = 0\n",
    "filename_testset = 'testset.json'\n",
    "\n",
    "#load the selected models\n",
    "model_q1 = torch.load('model_1')\n",
    "model_q2 = torch.load('model_2')\n",
    "model_q3 = torch.load('model_3')\n",
    "model_q4 = torch.load('model_4')\n",
    "model_q5 = torch.load('model_5')\n",
    "model_q6 = torch.load('model_6')\n",
    "\n",
    "#get the solutions of the models to the instances in the testset\n",
    "a, b, c, sols_q1 = test(instances, model_q1, 18)\n",
    "a, b, c, sols_q2 = test(instances, model_q2, 18)\n",
    "a, b, c, sols_q3 = test(instances, model_q3, 18)\n",
    "a,b,c,sols_q4 = test(instances,model_q4,18)\n",
    "a,b,c,sols_q5 = test(instances,model_q5,18)\n",
    "a,b,c,sols_q6 = test(instances,model_q6,18)\n",
    "\n",
    "with open(filename_testset, 'r') as f:\n",
    "    instances = json.load(f)\n",
    "# q4s_final_final_final = []\n",
    "for (instance,sol_q4,sol_q5,sol_q6) in zip(instances,sols_q4,sols_q5,sols_q6):\n",
    "            # solve the knapsack problem\n",
    "    indices_q1,total_value_q1 = get_item_indices(sol_q1,instance)\n",
    "    indices_q2,total_value_q2 = get_item_indices(sol_q2,instance)\n",
    "    indices_q3,total_value_q3 = get_item_indices(sol_q3,instance)\n",
    "    indices_q4,total_value_q4 = get_item_indices(sol_q4,instance)\n",
    "    indices_q5,total_value_q5 = get_item_indices(sol_q5,instance)\n",
    "    indices_q6,total_value_q6 = get_item_indices(sol_q6,instance)\n",
    "    \n",
    "    #store the recommendation value in the json for display in the webapp later\n",
    "    instance[\"recommendation_value_q1\"] = total_value_q1\n",
    "    instance[\"recommendation_value_q2\"] = total_value_q2\n",
    "    instance[\"recommendation_value_q3\"] = total_value_q3\n",
    "    instance[\"recommendation_value_q4\"] = total_value_q4\n",
    "    instance[\"recommendation_value_q5\"] = total_value_q5\n",
    "    instance[\"recommendation_value_q6\"] = total_value_q6 \n",
    "    \n",
    "    instances_new.append(instance)\n",
    "with open('ks_tasks_for_users.json', 'w') as f:\n",
    "        json.dump(instances_new, f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
