{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ce63a777",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle5\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"MIG-6abedaa4-16cd-51b2-9b2f-043073ed897a\"\n",
    "\n",
    "from model.ours3 import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "398c7676",
   "metadata": {},
   "outputs": [],
   "source": [
    "options = {}\n",
    "\n",
    "#basic setting\n",
    "options['use_cuda'] = True\n",
    "options['vocab_dir'] = '../MINERVA/datasets/data_preprocessed/FB15K-237/vocab/'\n",
    "options['data_input_dir'] = '../MINERVA/datasets/data_preprocessed/FB15K-237/'\n",
    "options['device'] = 'cuda' if options['use_cuda'] else 'cpu'\n",
    "options['relation_vocab'] = json.load(open(options['vocab_dir'] + '/relation_vocab.json'))\n",
    "options['entity_vocab'] = json.load(open(options['vocab_dir'] + '/entity_vocab.json'))\n",
    "options['model_dir'] = './outputs_FB15K-237-1/'\n",
    "options['output_dir'] = './outputs_FB15K-237-1/'\n",
    "\n",
    "#agent setting\n",
    "options['pretrained_embeddings_relation'] = {}\n",
    "options['pretrained_embeddings_entity'] = {}\n",
    "options['embedding_size'] = 50\n",
    "options['hidden_size'] = 200\n",
    "options['use_entity_embeddings'] = 1\n",
    "options['train_entity_embeddings'] = 1\n",
    "options['train_relation_embeddings'] = 1\n",
    "options['path_length'] = 3\n",
    "options['LSTM_layers'] = 1\n",
    "options['max_num_actions'] = 100\n",
    "options['gnn_layer'] = 2\n",
    "\n",
    "#hyperparameters\n",
    "options['test_rollouts'] = 100\n",
    "options['num_rollouts'] = 20\n",
    "options['batch_size'] = 8\n",
    "options['eval_batch_size'] = 12\n",
    "options['beta'] = 0.08\n",
    "options['Lambda'] = 0.08\n",
    "options['gamma'] = 1\n",
    "options['positive_reward'] = 1\n",
    "options['negative_reward'] = 0\n",
    "options['learning_rate'] = 5e-5\n",
    "options['grad_clip_norm'] = 100\n",
    "options['eval_every'] = 100\n",
    "options['total_iterations'] = 20000\n",
    "options['pool'] = 'max'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9d9b9fda",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cuda\n",
      "Reading vocab...\n",
      "batcher loaded\n",
      "KG constructed\n",
      "Reading vocab...\n",
      "batcher loaded\n",
      "KG constructed\n",
      "Reading vocab...\n",
      "batcher loaded\n",
      "KG constructed\n",
      "Agent start learning ...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/miniconda3/lib/python3.10/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='none' instead.\n",
      "  warnings.warn(warning.format(ret))\n",
      "/root/Research/GraphRL/experiments/model/ours3.py:334: UserWarning: Implicit dimension choice for log_softmax has been deprecated. Change the call to include dim=X as an argument.\n",
      "  return loss, new_state, F.log_softmax(scores), label_action, chosen_relation\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 10, Train loss: -0.3329, rewards: 0.0187\n",
      "Iteration: 20, Train loss: -0.3269, rewards: 0.0156\n",
      "Iteration: 30, Train loss: -0.3508, rewards: 0.0119\n",
      "Iteration: 40, Train loss: -0.3074, rewards: 0.0094\n",
      "Iteration: 50, Train loss: -0.3169, rewards: 0.0100\n",
      "Iteration: 60, Train loss: -0.3887, rewards: 0.0306\n",
      "Iteration: 70, Train loss: -0.3905, rewards: 0.0281\n",
      "Iteration: 80, Train loss: -0.3674, rewards: 0.0281\n",
      "Iteration: 90, Train loss: -0.3566, rewards: 0.0187\n",
      "Iteration: 100, Train loss: -0.3205, rewards: 0.0163\n",
      "Eval:\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/root/Research/GraphRL/experiments/model/ours3.py:636: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').\n",
      "  y = idx // self.max_num_actions\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Hits@1: 0.0070, Hits@3: 0.0241, Hits@10: 0.0626, MRR: 0.0256\n",
      "------------------------------------------------------------\n",
      "Iteration: 110, Train loss: -0.3501, rewards: 0.0288\n",
      "Iteration: 120, Train loss: -0.3860, rewards: 0.0262\n",
      "Iteration: 130, Train loss: -0.3356, rewards: 0.0219\n",
      "Iteration: 140, Train loss: -0.3972, rewards: 0.0356\n",
      "Iteration: 150, Train loss: -0.3903, rewards: 0.0269\n",
      "Iteration: 160, Train loss: -0.3506, rewards: 0.0194\n",
      "Iteration: 170, Train loss: -0.4426, rewards: 0.0462\n",
      "Iteration: 180, Train loss: -0.3138, rewards: 0.0081\n",
      "Iteration: 190, Train loss: -0.3682, rewards: 0.0144\n",
      "Iteration: 200, Train loss: -0.3862, rewards: 0.0256\n",
      "Eval:\n",
      "Hits@1: 0.0297, Hits@3: 0.0604, Hits@10: 0.1304, MRR: 0.0600\n",
      "------------------------------------------------------------\n",
      "Iteration: 210, Train loss: -0.3456, rewards: 0.0225\n",
      "Iteration: 220, Train loss: -0.3692, rewards: 0.0231\n",
      "Iteration: 230, Train loss: -0.3334, rewards: 0.0187\n",
      "Iteration: 240, Train loss: -0.3171, rewards: 0.0094\n",
      "Iteration: 250, Train loss: -0.3209, rewards: 0.0094\n",
      "Iteration: 260, Train loss: -0.3528, rewards: 0.0194\n",
      "Iteration: 270, Train loss: -0.3517, rewards: 0.0112\n",
      "Iteration: 280, Train loss: -0.3428, rewards: 0.0200\n",
      "Iteration: 290, Train loss: -0.4032, rewards: 0.0231\n",
      "Iteration: 300, Train loss: -0.3950, rewards: 0.0256\n",
      "Eval:\n",
      "Hits@1: 0.0267, Hits@3: 0.0585, Hits@10: 0.1321, MRR: 0.0594\n",
      "------------------------------------------------------------\n",
      "Iteration: 310, Train loss: -0.3478, rewards: 0.0206\n",
      "Iteration: 320, Train loss: -0.3728, rewards: 0.0194\n",
      "Iteration: 330, Train loss: -0.4234, rewards: 0.0494\n",
      "Iteration: 340, Train loss: -0.3877, rewards: 0.0288\n",
      "Iteration: 350, Train loss: -0.4256, rewards: 0.0356\n",
      "Iteration: 360, Train loss: -0.3753, rewards: 0.0294\n",
      "Iteration: 370, Train loss: -0.4176, rewards: 0.0306\n",
      "Iteration: 380, Train loss: -0.4213, rewards: 0.0275\n",
      "Iteration: 390, Train loss: -0.3725, rewards: 0.0300\n",
      "Iteration: 400, Train loss: -0.4315, rewards: 0.0338\n",
      "Eval:\n",
      "Hits@1: 0.0306, Hits@3: 0.0735, Hits@10: 0.1599, MRR: 0.0702\n",
      "------------------------------------------------------------\n",
      "Iteration: 410, Train loss: -0.4315, rewards: 0.0187\n",
      "Iteration: 420, Train loss: -0.5265, rewards: 0.0663\n",
      "Iteration: 430, Train loss: -0.4391, rewards: 0.0381\n",
      "Iteration: 440, Train loss: -0.4519, rewards: 0.0556\n",
      "Iteration: 450, Train loss: -0.5291, rewards: 0.0587\n",
      "Iteration: 460, Train loss: -0.3987, rewards: 0.0294\n",
      "Iteration: 470, Train loss: -0.4404, rewards: 0.0362\n",
      "Iteration: 480, Train loss: -0.4684, rewards: 0.0563\n",
      "Iteration: 490, Train loss: -0.4365, rewards: 0.0381\n",
      "Iteration: 500, Train loss: -0.4049, rewards: 0.0394\n",
      "Eval:\n",
      "Hits@1: 0.0293, Hits@3: 0.0675, Hits@10: 0.1453, MRR: 0.0657\n",
      "------------------------------------------------------------\n",
      "Iteration: 510, Train loss: -0.4094, rewards: 0.0294\n",
      "Iteration: 520, Train loss: -0.4871, rewards: 0.0550\n",
      "Iteration: 530, Train loss: -0.3785, rewards: 0.0331\n",
      "Iteration: 540, Train loss: -0.4385, rewards: 0.0262\n",
      "Iteration: 550, Train loss: -0.3708, rewards: 0.0362\n",
      "Iteration: 560, Train loss: -0.4990, rewards: 0.0406\n",
      "Iteration: 570, Train loss: -0.4643, rewards: 0.0500\n",
      "Iteration: 580, Train loss: -0.4126, rewards: 0.0288\n",
      "Iteration: 590, Train loss: -0.4273, rewards: 0.0294\n",
      "Iteration: 600, Train loss: -0.4256, rewards: 0.0350\n",
      "Eval:\n",
      "Hits@1: 0.0280, Hits@3: 0.0641, Hits@10: 0.1352, MRR: 0.0616\n",
      "------------------------------------------------------------\n",
      "Iteration: 610, Train loss: -0.4474, rewards: 0.0187\n",
      "Iteration: 620, Train loss: -0.4417, rewards: 0.0213\n",
      "Iteration: 630, Train loss: -0.3988, rewards: 0.0338\n",
      "Iteration: 640, Train loss: -0.4200, rewards: 0.0338\n",
      "Iteration: 650, Train loss: -0.3764, rewards: 0.0250\n",
      "Iteration: 660, Train loss: -0.4338, rewards: 0.0325\n",
      "Iteration: 670, Train loss: -0.4085, rewards: 0.0356\n",
      "Iteration: 680, Train loss: -0.4920, rewards: 0.0387\n",
      "Iteration: 690, Train loss: -0.4167, rewards: 0.0325\n",
      "Iteration: 700, Train loss: -0.4603, rewards: 0.0425\n",
      "Eval:\n",
      "Hits@1: 0.0572, Hits@3: 0.1145, Hits@10: 0.2168, MRR: 0.1060\n",
      "------------------------------------------------------------\n",
      "Iteration: 710, Train loss: -0.4689, rewards: 0.0537\n",
      "Iteration: 720, Train loss: -0.3796, rewards: 0.0213\n",
      "Iteration: 730, Train loss: -0.3506, rewards: 0.0175\n",
      "Iteration: 740, Train loss: -0.3776, rewards: 0.0288\n",
      "Iteration: 750, Train loss: -0.5573, rewards: 0.0794\n",
      "Iteration: 760, Train loss: -0.4217, rewards: 0.0294\n",
      "Iteration: 770, Train loss: -0.3825, rewards: 0.0344\n",
      "Iteration: 780, Train loss: -0.4943, rewards: 0.0594\n",
      "Iteration: 790, Train loss: -0.3601, rewards: 0.0225\n",
      "Iteration: 800, Train loss: -0.4073, rewards: 0.0344\n",
      "Eval:\n",
      "Hits@1: 0.0669, Hits@3: 0.1236, Hits@10: 0.2235, MRR: 0.1140\n",
      "------------------------------------------------------------\n",
      "Iteration: 810, Train loss: -0.4255, rewards: 0.0569\n",
      "Iteration: 820, Train loss: -0.3715, rewards: 0.0288\n",
      "Iteration: 830, Train loss: -0.5042, rewards: 0.0638\n",
      "Iteration: 840, Train loss: -0.4444, rewards: 0.0369\n",
      "Iteration: 850, Train loss: -0.4060, rewards: 0.0275\n",
      "Iteration: 860, Train loss: -0.4264, rewards: 0.0556\n",
      "Iteration: 870, Train loss: -0.4910, rewards: 0.0688\n",
      "Iteration: 880, Train loss: -0.4408, rewards: 0.0688\n",
      "Iteration: 890, Train loss: -0.4167, rewards: 0.0525\n",
      "Iteration: 900, Train loss: -0.4776, rewards: 0.0694\n",
      "Eval:\n",
      "Hits@1: 0.0977, Hits@3: 0.1744, Hits@10: 0.2647, MRR: 0.1523\n",
      "------------------------------------------------------------\n",
      "Iteration: 910, Train loss: -0.4433, rewards: 0.0669\n",
      "Iteration: 920, Train loss: -0.4479, rewards: 0.0650\n",
      "Iteration: 930, Train loss: -0.5406, rewards: 0.0688\n",
      "Iteration: 940, Train loss: -0.4606, rewards: 0.0556\n",
      "Iteration: 950, Train loss: -0.4932, rewards: 0.0656\n",
      "Iteration: 960, Train loss: -0.4599, rewards: 0.0638\n",
      "Iteration: 970, Train loss: -0.4429, rewards: 0.0525\n",
      "Iteration: 980, Train loss: -0.5488, rewards: 0.0531\n",
      "Iteration: 990, Train loss: -0.5090, rewards: 0.1044\n",
      "Iteration: 1000, Train loss: -0.4388, rewards: 0.0469\n",
      "Eval:\n",
      "Hits@1: 0.1333, Hits@3: 0.2051, Hits@10: 0.2912, MRR: 0.1843\n",
      "------------------------------------------------------------\n",
      "Iteration: 1010, Train loss: -0.6214, rewards: 0.1125\n",
      "Iteration: 1020, Train loss: -0.4716, rewards: 0.0619\n",
      "Iteration: 1030, Train loss: -0.5542, rewards: 0.0969\n",
      "Iteration: 1040, Train loss: -0.5384, rewards: 0.0969\n",
      "Iteration: 1050, Train loss: -0.6817, rewards: 0.1050\n",
      "Iteration: 1060, Train loss: -0.5648, rewards: 0.1019\n",
      "Iteration: 1070, Train loss: -0.5075, rewards: 0.0750\n",
      "Iteration: 1080, Train loss: -0.5521, rewards: 0.0775\n",
      "Iteration: 1090, Train loss: -0.5223, rewards: 0.1044\n",
      "Iteration: 1100, Train loss: -0.4874, rewards: 0.0963\n",
      "Eval:\n",
      "Hits@1: 0.1428, Hits@3: 0.2188, Hits@10: 0.3017, MRR: 0.1956\n",
      "------------------------------------------------------------\n",
      "Iteration: 1110, Train loss: -0.4969, rewards: 0.0675\n",
      "Iteration: 1120, Train loss: -0.5678, rewards: 0.0769\n",
      "Iteration: 1130, Train loss: -0.4827, rewards: 0.0663\n",
      "Iteration: 1140, Train loss: -0.5169, rewards: 0.1031\n",
      "Iteration: 1150, Train loss: -0.5955, rewards: 0.0862\n",
      "Iteration: 1160, Train loss: -0.6196, rewards: 0.1181\n",
      "Iteration: 1170, Train loss: -0.5854, rewards: 0.1006\n",
      "Iteration: 1180, Train loss: -0.5363, rewards: 0.0988\n",
      "Iteration: 1190, Train loss: -0.5890, rewards: 0.0906\n",
      "Iteration: 1200, Train loss: -0.4643, rewards: 0.0600\n",
      "Eval:\n",
      "Hits@1: 0.1420, Hits@3: 0.2250, Hits@10: 0.3135, MRR: 0.1993\n",
      "------------------------------------------------------------\n",
      "Iteration: 1210, Train loss: -0.4634, rewards: 0.0656\n",
      "Iteration: 1220, Train loss: -0.6128, rewards: 0.0869\n",
      "Iteration: 1230, Train loss: -0.6625, rewards: 0.1500\n",
      "Iteration: 1240, Train loss: -0.6316, rewards: 0.1288\n",
      "Iteration: 1250, Train loss: -0.6256, rewards: 0.1175\n",
      "Iteration: 1260, Train loss: -0.5631, rewards: 0.0712\n",
      "Iteration: 1270, Train loss: -0.5528, rewards: 0.1156\n",
      "Iteration: 1280, Train loss: -0.5070, rewards: 0.0975\n",
      "Iteration: 1290, Train loss: -0.5790, rewards: 0.0881\n",
      "Iteration: 1300, Train loss: -0.6072, rewards: 0.1013\n",
      "Eval:\n",
      "Hits@1: 0.1689, Hits@3: 0.2457, Hits@10: 0.3349, MRR: 0.2232\n",
      "------------------------------------------------------------\n",
      "Iteration: 1310, Train loss: -0.4513, rewards: 0.0594\n",
      "Iteration: 1320, Train loss: -0.5589, rewards: 0.0838\n",
      "Iteration: 1330, Train loss: -0.6218, rewards: 0.1319\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 1340, Train loss: -0.5042, rewards: 0.0725\n",
      "Iteration: 1350, Train loss: -0.6258, rewards: 0.0856\n",
      "Iteration: 1360, Train loss: -0.5725, rewards: 0.1000\n",
      "Iteration: 1370, Train loss: -0.6344, rewards: 0.1244\n",
      "Iteration: 1380, Train loss: -0.5173, rewards: 0.0825\n",
      "Iteration: 1390, Train loss: -0.5608, rewards: 0.1212\n",
      "Iteration: 1400, Train loss: -0.6314, rewards: 0.1187\n",
      "Eval:\n",
      "Hits@1: 0.1666, Hits@3: 0.2445, Hits@10: 0.3381, MRR: 0.2224\n",
      "------------------------------------------------------------\n",
      "Iteration: 1410, Train loss: -0.5897, rewards: 0.1087\n",
      "Iteration: 1420, Train loss: -0.6046, rewards: 0.0906\n",
      "Iteration: 1430, Train loss: -0.6673, rewards: 0.1250\n",
      "Iteration: 1440, Train loss: -0.7164, rewards: 0.1250\n",
      "Iteration: 1450, Train loss: -0.5179, rewards: 0.0781\n",
      "Iteration: 1460, Train loss: -0.5542, rewards: 0.0931\n",
      "Iteration: 1470, Train loss: -0.5906, rewards: 0.0838\n",
      "Iteration: 1480, Train loss: -0.6060, rewards: 0.1269\n",
      "Iteration: 1490, Train loss: -0.6165, rewards: 0.1069\n",
      "Iteration: 1500, Train loss: -0.6484, rewards: 0.1256\n",
      "Eval:\n",
      "Hits@1: 0.1784, Hits@3: 0.2573, Hits@10: 0.3490, MRR: 0.2335\n",
      "------------------------------------------------------------\n",
      "Iteration: 1510, Train loss: -0.6691, rewards: 0.1031\n",
      "Iteration: 1520, Train loss: -0.5954, rewards: 0.0956\n",
      "Iteration: 1530, Train loss: -0.5878, rewards: 0.0988\n",
      "Iteration: 1540, Train loss: -0.8107, rewards: 0.1363\n",
      "Iteration: 1550, Train loss: -0.5069, rewards: 0.0919\n",
      "Iteration: 1560, Train loss: -0.5348, rewards: 0.1219\n",
      "Iteration: 1570, Train loss: -0.6342, rewards: 0.0944\n",
      "Iteration: 1580, Train loss: -0.7007, rewards: 0.1350\n",
      "Iteration: 1590, Train loss: -0.5711, rewards: 0.1013\n",
      "Iteration: 1600, Train loss: -0.6062, rewards: 0.1019\n",
      "Eval:\n",
      "Hits@1: 0.1764, Hits@3: 0.2618, Hits@10: 0.3545, MRR: 0.2344\n",
      "------------------------------------------------------------\n",
      "Iteration: 1610, Train loss: -0.5633, rewards: 0.0838\n",
      "Iteration: 1620, Train loss: -0.5493, rewards: 0.1437\n",
      "Iteration: 1630, Train loss: -0.5925, rewards: 0.0956\n",
      "Iteration: 1640, Train loss: -0.6653, rewards: 0.1431\n",
      "Iteration: 1650, Train loss: -0.5983, rewards: 0.1144\n",
      "Iteration: 1660, Train loss: -0.6377, rewards: 0.1100\n",
      "Iteration: 1670, Train loss: -0.5220, rewards: 0.0887\n",
      "Iteration: 1680, Train loss: -0.5591, rewards: 0.0838\n",
      "Iteration: 1690, Train loss: -0.6328, rewards: 0.1181\n",
      "Iteration: 1700, Train loss: -0.5341, rewards: 0.1006\n",
      "Eval:\n",
      "Hits@1: 0.1862, Hits@3: 0.2681, Hits@10: 0.3648, MRR: 0.2442\n",
      "------------------------------------------------------------\n",
      "Iteration: 1710, Train loss: -0.6815, rewards: 0.1119\n",
      "Iteration: 1720, Train loss: -0.5921, rewards: 0.0900\n",
      "Iteration: 1730, Train loss: -0.5463, rewards: 0.0906\n",
      "Iteration: 1740, Train loss: -0.5138, rewards: 0.1194\n",
      "Iteration: 1750, Train loss: -0.6033, rewards: 0.1113\n",
      "Iteration: 1760, Train loss: -0.6302, rewards: 0.1431\n",
      "Iteration: 1770, Train loss: -0.6052, rewards: 0.1369\n",
      "Iteration: 1780, Train loss: -0.6947, rewards: 0.1531\n",
      "Iteration: 1790, Train loss: -0.4274, rewards: 0.0619\n",
      "Iteration: 1800, Train loss: -0.6608, rewards: 0.1237\n",
      "Eval:\n",
      "Hits@1: 0.1849, Hits@3: 0.2650, Hits@10: 0.3629, MRR: 0.2426\n",
      "------------------------------------------------------------\n",
      "Iteration: 1810, Train loss: -0.6900, rewards: 0.1212\n",
      "Iteration: 1820, Train loss: -0.5642, rewards: 0.1075\n",
      "Iteration: 1830, Train loss: -0.6165, rewards: 0.1037\n",
      "Iteration: 1840, Train loss: -0.6383, rewards: 0.1725\n",
      "Iteration: 1850, Train loss: -0.6507, rewards: 0.0894\n",
      "Iteration: 1860, Train loss: -0.5594, rewards: 0.1113\n",
      "Iteration: 1870, Train loss: -0.6331, rewards: 0.1062\n",
      "Iteration: 1880, Train loss: -0.5742, rewards: 0.1044\n",
      "Iteration: 1890, Train loss: -0.5559, rewards: 0.0900\n",
      "Iteration: 1900, Train loss: -0.6764, rewards: 0.1113\n",
      "Eval:\n",
      "Hits@1: 0.1824, Hits@3: 0.2555, Hits@10: 0.3512, MRR: 0.2366\n",
      "------------------------------------------------------------\n",
      "Iteration: 1910, Train loss: -0.6175, rewards: 0.0775\n",
      "Iteration: 1920, Train loss: -0.5752, rewards: 0.0894\n",
      "Iteration: 1930, Train loss: -0.6396, rewards: 0.1150\n",
      "Iteration: 1940, Train loss: -0.6095, rewards: 0.1250\n",
      "Iteration: 1950, Train loss: -0.5695, rewards: 0.0794\n",
      "Iteration: 1960, Train loss: -0.6413, rewards: 0.1163\n",
      "Iteration: 1970, Train loss: -0.5959, rewards: 0.0975\n",
      "Iteration: 1980, Train loss: -0.6931, rewards: 0.1119\n",
      "Iteration: 1990, Train loss: -0.4217, rewards: 0.0600\n",
      "Iteration: 2000, Train loss: -0.6426, rewards: 0.1394\n",
      "Eval:\n",
      "Hits@1: 0.1872, Hits@3: 0.2710, Hits@10: 0.3636, MRR: 0.2452\n",
      "------------------------------------------------------------\n",
      "Iteration: 2010, Train loss: -0.6882, rewards: 0.1094\n",
      "Iteration: 2020, Train loss: -0.5145, rewards: 0.0794\n",
      "Iteration: 2030, Train loss: -0.6457, rewards: 0.1494\n",
      "Iteration: 2040, Train loss: -0.6853, rewards: 0.1081\n",
      "Iteration: 2050, Train loss: -0.6542, rewards: 0.1113\n",
      "Iteration: 2060, Train loss: -0.7378, rewards: 0.1456\n",
      "Iteration: 2070, Train loss: -0.5217, rewards: 0.0644\n",
      "Iteration: 2080, Train loss: -0.5657, rewards: 0.1106\n",
      "Iteration: 2090, Train loss: -0.6483, rewards: 0.0963\n",
      "Iteration: 2100, Train loss: -0.6091, rewards: 0.1225\n",
      "Eval:\n",
      "Hits@1: 0.1861, Hits@3: 0.2718, Hits@10: 0.3665, MRR: 0.2454\n",
      "------------------------------------------------------------\n",
      "Iteration: 2110, Train loss: -0.6142, rewards: 0.1138\n",
      "Iteration: 2120, Train loss: -0.5938, rewards: 0.1194\n",
      "Iteration: 2130, Train loss: -0.8442, rewards: 0.1681\n",
      "Iteration: 2140, Train loss: -0.6877, rewards: 0.1600\n",
      "Iteration: 2150, Train loss: -0.6599, rewards: 0.1550\n",
      "Iteration: 2160, Train loss: -0.5834, rewards: 0.0881\n",
      "Iteration: 2170, Train loss: -0.6462, rewards: 0.1025\n",
      "Iteration: 2180, Train loss: -0.7475, rewards: 0.2081\n",
      "Iteration: 2190, Train loss: -0.6472, rewards: 0.1250\n",
      "Iteration: 2200, Train loss: -0.6370, rewards: 0.0887\n",
      "Eval:\n",
      "Hits@1: 0.1908, Hits@3: 0.2731, Hits@10: 0.3665, MRR: 0.2482\n",
      "------------------------------------------------------------\n",
      "Iteration: 2210, Train loss: -0.6060, rewards: 0.1519\n",
      "Iteration: 2220, Train loss: -0.6718, rewards: 0.1194\n",
      "Iteration: 2230, Train loss: -0.6987, rewards: 0.1700\n",
      "Iteration: 2240, Train loss: -0.6821, rewards: 0.1350\n",
      "Iteration: 2250, Train loss: -0.7391, rewards: 0.1394\n",
      "Iteration: 2260, Train loss: -0.6002, rewards: 0.1194\n",
      "Iteration: 2270, Train loss: -0.5925, rewards: 0.1256\n",
      "Iteration: 2280, Train loss: -0.6041, rewards: 0.1187\n",
      "Iteration: 2290, Train loss: -0.7441, rewards: 0.1431\n",
      "Iteration: 2300, Train loss: -0.7250, rewards: 0.1163\n",
      "Eval:\n",
      "Hits@1: 0.1979, Hits@3: 0.2813, Hits@10: 0.3730, MRR: 0.2554\n",
      "------------------------------------------------------------\n",
      "Iteration: 2310, Train loss: -0.6291, rewards: 0.1331\n",
      "Iteration: 2320, Train loss: -0.7253, rewards: 0.1388\n",
      "Iteration: 2330, Train loss: -0.6102, rewards: 0.1131\n",
      "Iteration: 2340, Train loss: -0.5954, rewards: 0.0862\n",
      "Iteration: 2350, Train loss: -0.5608, rewards: 0.1250\n",
      "Iteration: 2360, Train loss: -0.6319, rewards: 0.0862\n",
      "Iteration: 2370, Train loss: -0.5617, rewards: 0.1025\n",
      "Iteration: 2380, Train loss: -0.5871, rewards: 0.1056\n",
      "Iteration: 2390, Train loss: -0.6712, rewards: 0.1425\n",
      "Iteration: 2400, Train loss: -0.6637, rewards: 0.1237\n",
      "Eval:\n",
      "Hits@1: 0.1978, Hits@3: 0.2813, Hits@10: 0.3809, MRR: 0.2564\n",
      "------------------------------------------------------------\n",
      "Iteration: 2410, Train loss: -0.5383, rewards: 0.1106\n",
      "Iteration: 2420, Train loss: -0.6150, rewards: 0.1169\n",
      "Iteration: 2430, Train loss: -0.5715, rewards: 0.1006\n",
      "Iteration: 2440, Train loss: -0.6656, rewards: 0.1281\n",
      "Iteration: 2450, Train loss: -0.7835, rewards: 0.2213\n",
      "Iteration: 2460, Train loss: -0.6854, rewards: 0.1338\n",
      "Iteration: 2470, Train loss: -0.5730, rewards: 0.1144\n",
      "Iteration: 2480, Train loss: -0.5348, rewards: 0.0869\n",
      "Iteration: 2490, Train loss: -0.6954, rewards: 0.1450\n",
      "Iteration: 2500, Train loss: -0.6776, rewards: 0.1375\n",
      "Eval:\n",
      "Hits@1: 0.2059, Hits@3: 0.2948, Hits@10: 0.3889, MRR: 0.2661\n",
      "------------------------------------------------------------\n",
      "Iteration: 2510, Train loss: -0.7768, rewards: 0.1638\n",
      "Iteration: 2520, Train loss: -0.7435, rewards: 0.1525\n",
      "Iteration: 2530, Train loss: -0.8599, rewards: 0.1562\n",
      "Iteration: 2540, Train loss: -0.7743, rewards: 0.1375\n",
      "Iteration: 2550, Train loss: -0.8057, rewards: 0.1494\n",
      "Iteration: 2560, Train loss: -0.6106, rewards: 0.1250\n",
      "Iteration: 2570, Train loss: -0.7461, rewards: 0.1588\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 2580, Train loss: -0.7246, rewards: 0.1225\n",
      "Iteration: 2590, Train loss: -0.6552, rewards: 0.1450\n",
      "Iteration: 2600, Train loss: -0.6620, rewards: 0.1381\n",
      "Eval:\n",
      "Hits@1: 0.1946, Hits@3: 0.2859, Hits@10: 0.3801, MRR: 0.2555\n",
      "------------------------------------------------------------\n",
      "Iteration: 2610, Train loss: -0.7028, rewards: 0.1369\n",
      "Iteration: 2620, Train loss: -0.6940, rewards: 0.1469\n",
      "Iteration: 2630, Train loss: -0.6559, rewards: 0.1344\n",
      "Iteration: 2640, Train loss: -0.7483, rewards: 0.1619\n",
      "Iteration: 2650, Train loss: -0.6146, rewards: 0.1044\n",
      "Iteration: 2660, Train loss: -0.7660, rewards: 0.2062\n",
      "Iteration: 2670, Train loss: -0.7859, rewards: 0.1869\n",
      "Iteration: 2680, Train loss: -0.6420, rewards: 0.1219\n",
      "Iteration: 2690, Train loss: -0.7270, rewards: 0.1256\n",
      "Iteration: 2700, Train loss: -0.6739, rewards: 0.1400\n",
      "Eval:\n",
      "Hits@1: 0.2081, Hits@3: 0.2984, Hits@10: 0.3982, MRR: 0.2701\n",
      "------------------------------------------------------------\n",
      "Iteration: 2710, Train loss: -0.8116, rewards: 0.1675\n",
      "Iteration: 2720, Train loss: -0.7987, rewards: 0.1744\n",
      "Iteration: 2730, Train loss: -0.6645, rewards: 0.1181\n",
      "Iteration: 2740, Train loss: -0.5664, rewards: 0.1419\n",
      "Iteration: 2750, Train loss: -0.7461, rewards: 0.1194\n",
      "Iteration: 2760, Train loss: -0.6290, rewards: 0.1325\n",
      "Iteration: 2770, Train loss: -0.6972, rewards: 0.1212\n",
      "Iteration: 2780, Train loss: -0.7192, rewards: 0.1412\n",
      "Iteration: 2790, Train loss: -0.6628, rewards: 0.1013\n",
      "Iteration: 2800, Train loss: -0.5919, rewards: 0.0856\n",
      "Eval:\n",
      "Hits@1: 0.1997, Hits@3: 0.2823, Hits@10: 0.3803, MRR: 0.2580\n",
      "------------------------------------------------------------\n",
      "Iteration: 2810, Train loss: -0.7518, rewards: 0.1369\n",
      "Iteration: 2820, Train loss: -0.7003, rewards: 0.1531\n",
      "Iteration: 2830, Train loss: -0.6439, rewards: 0.1169\n",
      "Iteration: 2840, Train loss: -0.5635, rewards: 0.0700\n",
      "Iteration: 2850, Train loss: -0.6055, rewards: 0.1363\n",
      "Iteration: 2860, Train loss: -0.6230, rewards: 0.1481\n",
      "Iteration: 2870, Train loss: -0.6566, rewards: 0.1400\n",
      "Iteration: 2880, Train loss: -0.5924, rewards: 0.1544\n",
      "Iteration: 2890, Train loss: -0.6927, rewards: 0.1350\n",
      "Iteration: 2900, Train loss: -0.7313, rewards: 0.1625\n",
      "Eval:\n",
      "Hits@1: 0.2113, Hits@3: 0.3001, Hits@10: 0.3979, MRR: 0.2722\n",
      "------------------------------------------------------------\n",
      "Iteration: 2910, Train loss: -0.6581, rewards: 0.1250\n",
      "Iteration: 2920, Train loss: -0.8741, rewards: 0.1675\n",
      "Iteration: 2930, Train loss: -0.6057, rewards: 0.1400\n",
      "Iteration: 2940, Train loss: -0.6271, rewards: 0.1094\n",
      "Iteration: 2950, Train loss: -0.8336, rewards: 0.1469\n",
      "Iteration: 2960, Train loss: -0.7414, rewards: 0.1775\n",
      "Iteration: 2970, Train loss: -0.6949, rewards: 0.1744\n",
      "Iteration: 2980, Train loss: -0.7029, rewards: 0.1331\n",
      "Iteration: 2990, Train loss: -0.7397, rewards: 0.1369\n",
      "Iteration: 3000, Train loss: -0.7354, rewards: 0.2000\n",
      "Eval:\n",
      "Hits@1: 0.2121, Hits@3: 0.2996, Hits@10: 0.3994, MRR: 0.2730\n",
      "------------------------------------------------------------\n",
      "Iteration: 3010, Train loss: -0.5485, rewards: 0.0906\n",
      "Iteration: 3020, Train loss: -0.6570, rewards: 0.1663\n",
      "Iteration: 3030, Train loss: -0.6927, rewards: 0.1550\n",
      "Iteration: 3040, Train loss: -0.6532, rewards: 0.1400\n",
      "Iteration: 3050, Train loss: -0.6954, rewards: 0.1388\n",
      "Iteration: 3060, Train loss: -0.6699, rewards: 0.1144\n",
      "Iteration: 3070, Train loss: -0.6125, rewards: 0.1144\n",
      "Iteration: 3080, Train loss: -0.6131, rewards: 0.1062\n",
      "Iteration: 3090, Train loss: -0.6590, rewards: 0.1381\n",
      "Iteration: 3100, Train loss: -0.7059, rewards: 0.1288\n",
      "Eval:\n",
      "Hits@1: 0.2129, Hits@3: 0.3026, Hits@10: 0.4035, MRR: 0.2754\n",
      "------------------------------------------------------------\n",
      "Iteration: 3110, Train loss: -0.6845, rewards: 0.1281\n",
      "Iteration: 3120, Train loss: -0.8363, rewards: 0.1975\n",
      "Iteration: 3130, Train loss: -0.7530, rewards: 0.1800\n",
      "Iteration: 3140, Train loss: -0.7083, rewards: 0.1425\n",
      "Iteration: 3150, Train loss: -0.6200, rewards: 0.1400\n",
      "Iteration: 3160, Train loss: -0.7207, rewards: 0.1487\n",
      "Iteration: 3170, Train loss: -0.7420, rewards: 0.1419\n",
      "Iteration: 3180, Train loss: -0.7122, rewards: 0.1456\n",
      "Iteration: 3190, Train loss: -0.7202, rewards: 0.1450\n",
      "Iteration: 3200, Train loss: -0.6441, rewards: 0.0912\n",
      "Eval:\n",
      "Hits@1: 0.2166, Hits@3: 0.3066, Hits@10: 0.4087, MRR: 0.2790\n",
      "------------------------------------------------------------\n",
      "Iteration: 3210, Train loss: -0.6633, rewards: 0.0938\n",
      "Iteration: 3220, Train loss: -0.8026, rewards: 0.1537\n",
      "Iteration: 3230, Train loss: -0.7008, rewards: 0.1706\n",
      "Iteration: 3240, Train loss: -0.7244, rewards: 0.1469\n",
      "Iteration: 3250, Train loss: -0.7171, rewards: 0.1187\n",
      "Iteration: 3260, Train loss: -0.6229, rewards: 0.1500\n",
      "Iteration: 3270, Train loss: -0.6253, rewards: 0.1231\n",
      "Iteration: 3280, Train loss: -0.6772, rewards: 0.1131\n",
      "Iteration: 3290, Train loss: -0.6418, rewards: 0.1219\n",
      "Iteration: 3300, Train loss: -0.6064, rewards: 0.1300\n",
      "Eval:\n",
      "Hits@1: 0.2156, Hits@3: 0.3055, Hits@10: 0.4056, MRR: 0.2769\n",
      "------------------------------------------------------------\n",
      "Iteration: 3310, Train loss: -0.7369, rewards: 0.1281\n",
      "Iteration: 3320, Train loss: -0.6728, rewards: 0.1044\n",
      "Iteration: 3330, Train loss: -0.7711, rewards: 0.1775\n",
      "Iteration: 3340, Train loss: -0.6785, rewards: 0.1481\n",
      "Iteration: 3350, Train loss: -0.6819, rewards: 0.1494\n",
      "Iteration: 3360, Train loss: -0.7849, rewards: 0.1700\n",
      "Iteration: 3370, Train loss: -0.7145, rewards: 0.1375\n",
      "Iteration: 3380, Train loss: -0.6439, rewards: 0.1588\n",
      "Iteration: 3390, Train loss: -0.7640, rewards: 0.1894\n",
      "Iteration: 3400, Train loss: -0.6067, rewards: 0.1775\n",
      "Eval:\n",
      "Hits@1: 0.2131, Hits@3: 0.3038, Hits@10: 0.4001, MRR: 0.2745\n",
      "------------------------------------------------------------\n",
      "Iteration: 3410, Train loss: -0.6600, rewards: 0.1306\n",
      "Iteration: 3420, Train loss: -0.7580, rewards: 0.1550\n",
      "Iteration: 3430, Train loss: -0.8008, rewards: 0.1469\n",
      "Iteration: 3440, Train loss: -0.7337, rewards: 0.1500\n",
      "Iteration: 3450, Train loss: -0.7038, rewards: 0.1381\n",
      "Iteration: 3460, Train loss: -0.7465, rewards: 0.1519\n",
      "Iteration: 3470, Train loss: -0.6921, rewards: 0.1225\n",
      "Iteration: 3480, Train loss: -0.6785, rewards: 0.1394\n",
      "Iteration: 3490, Train loss: -0.6665, rewards: 0.1794\n",
      "Iteration: 3500, Train loss: -0.7081, rewards: 0.1694\n",
      "Eval:\n",
      "Hits@1: 0.2227, Hits@3: 0.3085, Hits@10: 0.4042, MRR: 0.2819\n",
      "------------------------------------------------------------\n",
      "Iteration: 3510, Train loss: -0.7894, rewards: 0.1331\n",
      "Iteration: 3520, Train loss: -0.7460, rewards: 0.1500\n",
      "Iteration: 3530, Train loss: -0.6756, rewards: 0.1481\n",
      "Iteration: 3540, Train loss: -0.7999, rewards: 0.1725\n",
      "Iteration: 3550, Train loss: -0.7508, rewards: 0.1412\n",
      "Iteration: 3560, Train loss: -0.6520, rewards: 0.1569\n",
      "Iteration: 3570, Train loss: -0.7821, rewards: 0.1319\n",
      "Iteration: 3580, Train loss: -0.8787, rewards: 0.1931\n",
      "Iteration: 3590, Train loss: -0.5855, rewards: 0.1212\n",
      "Iteration: 3600, Train loss: -0.9498, rewards: 0.2075\n",
      "Eval:\n",
      "Hits@1: 0.2260, Hits@3: 0.3130, Hits@10: 0.4158, MRR: 0.2868\n",
      "------------------------------------------------------------\n",
      "Iteration: 3610, Train loss: -0.6598, rewards: 0.1331\n",
      "Iteration: 3620, Train loss: -0.8395, rewards: 0.1481\n",
      "Iteration: 3630, Train loss: -0.7135, rewards: 0.1750\n",
      "Iteration: 3640, Train loss: -0.6923, rewards: 0.1431\n",
      "Iteration: 3650, Train loss: -0.7689, rewards: 0.1412\n",
      "Iteration: 3660, Train loss: -0.7851, rewards: 0.1619\n",
      "Iteration: 3670, Train loss: -0.7165, rewards: 0.1613\n",
      "Iteration: 3680, Train loss: -0.8703, rewards: 0.1894\n",
      "Iteration: 3690, Train loss: -0.6139, rewards: 0.1100\n",
      "Iteration: 3700, Train loss: -0.6906, rewards: 0.1588\n",
      "Eval:\n",
      "Hits@1: 0.2131, Hits@3: 0.3073, Hits@10: 0.4047, MRR: 0.2762\n",
      "------------------------------------------------------------\n",
      "Iteration: 3710, Train loss: -0.7030, rewards: 0.1956\n",
      "Iteration: 3720, Train loss: -0.8659, rewards: 0.2256\n",
      "Iteration: 3730, Train loss: -0.7334, rewards: 0.1688\n",
      "Iteration: 3740, Train loss: -0.8043, rewards: 0.1938\n",
      "Iteration: 3750, Train loss: -0.6142, rewards: 0.1281\n",
      "Iteration: 3760, Train loss: -0.6402, rewards: 0.1319\n",
      "Iteration: 3770, Train loss: -0.6739, rewards: 0.1319\n",
      "Iteration: 3780, Train loss: -0.8788, rewards: 0.1819\n",
      "Iteration: 3790, Train loss: -0.5970, rewards: 0.1506\n",
      "Iteration: 3800, Train loss: -0.7626, rewards: 0.1981\n",
      "Eval:\n",
      "Hits@1: 0.2190, Hits@3: 0.3066, Hits@10: 0.4026, MRR: 0.2793\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "------------------------------------------------------------\n",
      "Iteration: 3810, Train loss: -0.7479, rewards: 0.1944\n",
      "Iteration: 3820, Train loss: -0.8122, rewards: 0.1781\n",
      "Iteration: 3830, Train loss: -0.6865, rewards: 0.2075\n",
      "Iteration: 3840, Train loss: -0.7729, rewards: 0.1994\n",
      "Iteration: 3850, Train loss: -0.7851, rewards: 0.1831\n",
      "Iteration: 3860, Train loss: -0.7091, rewards: 0.2037\n",
      "Iteration: 3870, Train loss: -0.8424, rewards: 0.2056\n",
      "Iteration: 3880, Train loss: -0.8119, rewards: 0.2213\n",
      "Iteration: 3890, Train loss: -0.7759, rewards: 0.1656\n",
      "Iteration: 3900, Train loss: -0.7741, rewards: 0.2031\n",
      "Eval:\n",
      "Hits@1: 0.2149, Hits@3: 0.3024, Hits@10: 0.3921, MRR: 0.2739\n",
      "------------------------------------------------------------\n",
      "Iteration: 3910, Train loss: -0.7160, rewards: 0.1719\n",
      "Iteration: 3920, Train loss: -0.6720, rewards: 0.1919\n",
      "Iteration: 3930, Train loss: -0.8181, rewards: 0.1938\n",
      "Iteration: 3940, Train loss: -0.8349, rewards: 0.1688\n",
      "Iteration: 3950, Train loss: -0.7044, rewards: 0.1256\n",
      "Iteration: 3960, Train loss: -0.6547, rewards: 0.1569\n",
      "Iteration: 3970, Train loss: -0.8544, rewards: 0.2094\n",
      "Iteration: 3980, Train loss: -0.8058, rewards: 0.1819\n",
      "Iteration: 3990, Train loss: -0.7426, rewards: 0.1550\n",
      "Iteration: 4000, Train loss: -0.6544, rewards: 0.1756\n",
      "Eval:\n",
      "Hits@1: 0.2187, Hits@3: 0.3037, Hits@10: 0.4008, MRR: 0.2777\n",
      "------------------------------------------------------------\n",
      "Iteration: 4010, Train loss: -0.7796, rewards: 0.1412\n",
      "Iteration: 4020, Train loss: -0.9203, rewards: 0.2112\n",
      "Iteration: 4030, Train loss: -0.7950, rewards: 0.1700\n",
      "Iteration: 4040, Train loss: -0.8140, rewards: 0.2269\n",
      "Iteration: 4050, Train loss: -0.7926, rewards: 0.1450\n",
      "Iteration: 4060, Train loss: -0.7392, rewards: 0.1631\n",
      "Iteration: 4070, Train loss: -0.8625, rewards: 0.2331\n",
      "Iteration: 4080, Train loss: -0.6515, rewards: 0.1281\n",
      "Iteration: 4090, Train loss: -0.7303, rewards: 0.1537\n",
      "Iteration: 4100, Train loss: -0.7339, rewards: 0.1756\n",
      "Eval:\n",
      "Hits@1: 0.2198, Hits@3: 0.3126, Hits@10: 0.4100, MRR: 0.2818\n",
      "------------------------------------------------------------\n",
      "Iteration: 4110, Train loss: -0.7082, rewards: 0.1431\n",
      "Iteration: 4120, Train loss: -0.8161, rewards: 0.1544\n",
      "Iteration: 4130, Train loss: -0.7924, rewards: 0.1656\n",
      "Iteration: 4140, Train loss: -0.7459, rewards: 0.1263\n",
      "Iteration: 4150, Train loss: -0.8355, rewards: 0.1994\n",
      "Iteration: 4160, Train loss: -0.6953, rewards: 0.1875\n",
      "Iteration: 4170, Train loss: -0.8854, rewards: 0.1919\n",
      "Iteration: 4180, Train loss: -0.6890, rewards: 0.1862\n",
      "Iteration: 4190, Train loss: -0.5868, rewards: 0.1138\n",
      "Iteration: 4200, Train loss: -0.9241, rewards: 0.1625\n",
      "Eval:\n",
      "Hits@1: 0.2153, Hits@3: 0.3074, Hits@10: 0.4045, MRR: 0.2775\n",
      "------------------------------------------------------------\n",
      "Iteration: 4210, Train loss: -0.9178, rewards: 0.1944\n",
      "Iteration: 4220, Train loss: -0.7825, rewards: 0.2181\n",
      "Iteration: 4230, Train loss: -0.7913, rewards: 0.1875\n",
      "Iteration: 4240, Train loss: -0.8244, rewards: 0.1769\n",
      "Iteration: 4250, Train loss: -0.5767, rewards: 0.1344\n",
      "Iteration: 4260, Train loss: -0.7387, rewards: 0.1363\n",
      "Iteration: 4270, Train loss: -0.8112, rewards: 0.2419\n",
      "Iteration: 4280, Train loss: -0.7079, rewards: 0.1369\n",
      "Iteration: 4290, Train loss: -0.7362, rewards: 0.1575\n",
      "Iteration: 4300, Train loss: -0.7265, rewards: 0.1325\n",
      "Eval:\n",
      "Hits@1: 0.2219, Hits@3: 0.3115, Hits@10: 0.4072, MRR: 0.2830\n",
      "------------------------------------------------------------\n",
      "Iteration: 4310, Train loss: -0.7412, rewards: 0.1713\n",
      "Iteration: 4320, Train loss: -0.6692, rewards: 0.1888\n",
      "Iteration: 4330, Train loss: -0.7126, rewards: 0.1856\n",
      "Iteration: 4340, Train loss: -0.7015, rewards: 0.1944\n",
      "Iteration: 4350, Train loss: -0.8394, rewards: 0.2037\n",
      "Iteration: 4360, Train loss: -0.7681, rewards: 0.1837\n",
      "Iteration: 4370, Train loss: -0.6990, rewards: 0.1519\n",
      "Iteration: 4380, Train loss: -0.9487, rewards: 0.2013\n",
      "Iteration: 4390, Train loss: -0.6119, rewards: 0.1169\n",
      "Iteration: 4400, Train loss: -0.6185, rewards: 0.1638\n",
      "Eval:\n",
      "Hits@1: 0.2210, Hits@3: 0.3158, Hits@10: 0.4117, MRR: 0.2840\n",
      "------------------------------------------------------------\n",
      "Iteration: 4410, Train loss: -0.5800, rewards: 0.0862\n",
      "Iteration: 4420, Train loss: -0.6627, rewards: 0.1256\n",
      "Iteration: 4430, Train loss: -0.8901, rewards: 0.1762\n",
      "Iteration: 4440, Train loss: -0.8130, rewards: 0.1837\n",
      "Iteration: 4450, Train loss: -0.5411, rewards: 0.1356\n",
      "Iteration: 4460, Train loss: -0.6444, rewards: 0.1269\n",
      "Iteration: 4470, Train loss: -0.9044, rewards: 0.2112\n",
      "Iteration: 4480, Train loss: -0.9387, rewards: 0.2306\n",
      "Iteration: 4490, Train loss: -0.8432, rewards: 0.2144\n",
      "Iteration: 4500, Train loss: -0.7601, rewards: 0.2369\n",
      "Eval:\n",
      "Hits@1: 0.2200, Hits@3: 0.3148, Hits@10: 0.4127, MRR: 0.2829\n",
      "------------------------------------------------------------\n",
      "Iteration: 4510, Train loss: -0.7331, rewards: 0.1769\n",
      "Iteration: 4520, Train loss: -0.8364, rewards: 0.1913\n",
      "Iteration: 4530, Train loss: -0.7822, rewards: 0.1700\n",
      "Iteration: 4540, Train loss: -0.7753, rewards: 0.1675\n",
      "Iteration: 4550, Train loss: -0.7211, rewards: 0.1244\n",
      "Iteration: 4560, Train loss: -0.7792, rewards: 0.1819\n",
      "Iteration: 4570, Train loss: -0.7426, rewards: 0.1850\n",
      "Iteration: 4580, Train loss: -0.7750, rewards: 0.1713\n",
      "Iteration: 4590, Train loss: -0.7847, rewards: 0.1794\n",
      "Iteration: 4600, Train loss: -0.7271, rewards: 0.1294\n",
      "Eval:\n",
      "Hits@1: 0.2302, Hits@3: 0.3178, Hits@10: 0.4137, MRR: 0.2896\n",
      "------------------------------------------------------------\n",
      "Iteration: 4610, Train loss: -0.6772, rewards: 0.1581\n",
      "Iteration: 4620, Train loss: -0.7879, rewards: 0.1806\n",
      "Iteration: 4630, Train loss: -0.5820, rewards: 0.1181\n",
      "Iteration: 4640, Train loss: -0.6589, rewards: 0.1319\n",
      "Iteration: 4650, Train loss: -0.6846, rewards: 0.1581\n",
      "Iteration: 4660, Train loss: -0.9346, rewards: 0.2150\n",
      "Iteration: 4670, Train loss: -0.9034, rewards: 0.2213\n",
      "Iteration: 4680, Train loss: -0.6746, rewards: 0.1519\n",
      "Iteration: 4690, Train loss: -0.7589, rewards: 0.1875\n",
      "Iteration: 4700, Train loss: -0.6486, rewards: 0.1694\n",
      "Eval:\n",
      "Hits@1: 0.2193, Hits@3: 0.3099, Hits@10: 0.4068, MRR: 0.2804\n",
      "------------------------------------------------------------\n",
      "Iteration: 4710, Train loss: -0.6945, rewards: 0.1925\n",
      "Iteration: 4720, Train loss: -0.8262, rewards: 0.1837\n",
      "Iteration: 4730, Train loss: -0.7751, rewards: 0.2662\n",
      "Iteration: 4750, Train loss: -0.9272, rewards: 0.1850\n",
      "Iteration: 4760, Train loss: -0.8430, rewards: 0.1831\n",
      "Iteration: 4770, Train loss: -0.9730, rewards: 0.1775\n",
      "Iteration: 4780, Train loss: -0.7835, rewards: 0.1344\n",
      "Iteration: 4790, Train loss: -0.8523, rewards: 0.2250\n",
      "Iteration: 4800, Train loss: -0.8322, rewards: 0.1475\n",
      "Eval:\n",
      "Hits@1: 0.2252, Hits@3: 0.3145, Hits@10: 0.4057, MRR: 0.2850\n",
      "------------------------------------------------------------\n",
      "Iteration: 4810, Train loss: -0.7140, rewards: 0.1400\n",
      "Iteration: 4820, Train loss: -0.8194, rewards: 0.2369\n",
      "Iteration: 4830, Train loss: -0.6814, rewards: 0.1400\n",
      "Iteration: 4840, Train loss: -0.9117, rewards: 0.2950\n",
      "Iteration: 4850, Train loss: -0.7513, rewards: 0.1769\n",
      "Iteration: 4860, Train loss: -0.6577, rewards: 0.1631\n",
      "Iteration: 4870, Train loss: -0.8036, rewards: 0.1625\n",
      "Iteration: 4880, Train loss: -0.7870, rewards: 0.1988\n",
      "Iteration: 4890, Train loss: -0.7569, rewards: 0.1444\n",
      "Iteration: 4900, Train loss: -0.6633, rewards: 0.1244\n",
      "Eval:\n",
      "Hits@1: 0.2253, Hits@3: 0.3187, Hits@10: 0.4140, MRR: 0.2874\n",
      "------------------------------------------------------------\n",
      "Iteration: 4910, Train loss: -0.7536, rewards: 0.1931\n",
      "Iteration: 4920, Train loss: -0.7787, rewards: 0.1375\n",
      "Iteration: 4930, Train loss: -0.6592, rewards: 0.1550\n",
      "Iteration: 4940, Train loss: -0.6184, rewards: 0.1256\n",
      "Iteration: 4950, Train loss: -0.8007, rewards: 0.1844\n",
      "Iteration: 4960, Train loss: -0.8611, rewards: 0.2381\n",
      "Iteration: 4970, Train loss: -0.8849, rewards: 0.1631\n",
      "Iteration: 4980, Train loss: -0.5945, rewards: 0.1125\n",
      "Iteration: 4990, Train loss: -0.7863, rewards: 0.1519\n",
      "Iteration: 5000, Train loss: -0.8495, rewards: 0.1806\n",
      "Eval:\n",
      "Hits@1: 0.2282, Hits@3: 0.3199, Hits@10: 0.4158, MRR: 0.2896\n",
      "------------------------------------------------------------\n",
      "Iteration: 5010, Train loss: -0.8562, rewards: 0.2531\n",
      "Iteration: 5020, Train loss: -0.6772, rewards: 0.1300\n",
      "Iteration: 5030, Train loss: -0.6066, rewards: 0.1044\n",
      "Iteration: 5040, Train loss: -0.8036, rewards: 0.1894\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 5050, Train loss: -0.7782, rewards: 0.1638\n",
      "Iteration: 5060, Train loss: -0.7569, rewards: 0.1669\n",
      "Iteration: 5070, Train loss: -0.7073, rewards: 0.1531\n",
      "Iteration: 5080, Train loss: -0.7810, rewards: 0.1844\n",
      "Iteration: 5090, Train loss: -0.6700, rewards: 0.1269\n",
      "Iteration: 5100, Train loss: -0.8935, rewards: 0.2250\n",
      "Eval:\n",
      "Hits@1: 0.2360, Hits@3: 0.3286, Hits@10: 0.4195, MRR: 0.2970\n",
      "------------------------------------------------------------\n",
      "Iteration: 5110, Train loss: -0.7148, rewards: 0.1831\n",
      "Iteration: 5120, Train loss: -0.7113, rewards: 0.1275\n",
      "Iteration: 5130, Train loss: -0.7540, rewards: 0.1400\n",
      "Iteration: 5140, Train loss: -0.6728, rewards: 0.1531\n",
      "Iteration: 5150, Train loss: -0.8570, rewards: 0.1731\n",
      "Iteration: 5160, Train loss: -0.6934, rewards: 0.1731\n",
      "Iteration: 5170, Train loss: -0.7280, rewards: 0.1650\n",
      "Iteration: 5180, Train loss: -0.7226, rewards: 0.1512\n",
      "Iteration: 5190, Train loss: -0.6863, rewards: 0.1263\n",
      "Iteration: 5200, Train loss: -0.6902, rewards: 0.1475\n",
      "Eval:\n",
      "Hits@1: 0.2334, Hits@3: 0.3256, Hits@10: 0.4213, MRR: 0.2958\n",
      "------------------------------------------------------------\n",
      "Iteration: 5210, Train loss: -0.8743, rewards: 0.1787\n",
      "Iteration: 5220, Train loss: -0.6277, rewards: 0.1431\n",
      "Iteration: 5230, Train loss: -0.7272, rewards: 0.1300\n",
      "Iteration: 5240, Train loss: -0.7619, rewards: 0.1475\n",
      "Iteration: 5250, Train loss: -0.6768, rewards: 0.1300\n",
      "Iteration: 5260, Train loss: -0.5782, rewards: 0.1388\n",
      "Iteration: 5270, Train loss: -0.9058, rewards: 0.2075\n",
      "Iteration: 5280, Train loss: -0.7661, rewards: 0.1931\n",
      "Iteration: 5290, Train loss: -0.7837, rewards: 0.1769\n",
      "Iteration: 5300, Train loss: -0.7049, rewards: 0.1363\n",
      "Eval:\n",
      "Hits@1: 0.2325, Hits@3: 0.3257, Hits@10: 0.4210, MRR: 0.2951\n",
      "------------------------------------------------------------\n",
      "Iteration: 5310, Train loss: -0.7337, rewards: 0.1087\n",
      "Iteration: 5320, Train loss: -0.8267, rewards: 0.1650\n",
      "Iteration: 5330, Train loss: -0.7741, rewards: 0.1700\n",
      "Iteration: 5340, Train loss: -0.6573, rewards: 0.1244\n",
      "Iteration: 5350, Train loss: -0.7665, rewards: 0.1831\n",
      "Iteration: 5360, Train loss: -0.7445, rewards: 0.1544\n",
      "Iteration: 5370, Train loss: -0.8035, rewards: 0.2181\n",
      "Iteration: 5380, Train loss: -0.7028, rewards: 0.1444\n",
      "Iteration: 5390, Train loss: -0.7095, rewards: 0.1994\n",
      "Iteration: 5400, Train loss: -0.8186, rewards: 0.2456\n",
      "Eval:\n",
      "Hits@1: 0.2330, Hits@3: 0.3267, Hits@10: 0.4194, MRR: 0.2944\n",
      "------------------------------------------------------------\n",
      "Iteration: 5410, Train loss: -0.7571, rewards: 0.2094\n",
      "Iteration: 5420, Train loss: -0.7984, rewards: 0.1650\n",
      "Iteration: 5430, Train loss: -0.7347, rewards: 0.1969\n",
      "Iteration: 5440, Train loss: -0.8301, rewards: 0.2031\n",
      "Iteration: 5450, Train loss: -0.6704, rewards: 0.1544\n",
      "Iteration: 5460, Train loss: -0.7998, rewards: 0.1581\n",
      "Iteration: 5470, Train loss: -0.9036, rewards: 0.2306\n",
      "Iteration: 5480, Train loss: -0.7586, rewards: 0.2294\n",
      "Iteration: 5490, Train loss: -0.7494, rewards: 0.1531\n",
      "Iteration: 5500, Train loss: -0.8679, rewards: 0.1963\n",
      "Eval:\n",
      "Hits@1: 0.2351, Hits@3: 0.3269, Hits@10: 0.4195, MRR: 0.2964\n",
      "------------------------------------------------------------\n",
      "Iteration: 5510, Train loss: -0.8653, rewards: 0.1700\n",
      "Iteration: 5520, Train loss: -0.8159, rewards: 0.1900\n",
      "Iteration: 5530, Train loss: -0.7607, rewards: 0.2044\n",
      "Iteration: 5540, Train loss: -0.8077, rewards: 0.2412\n",
      "Iteration: 5550, Train loss: -0.8387, rewards: 0.1950\n",
      "Iteration: 5560, Train loss: -0.7488, rewards: 0.1481\n",
      "Iteration: 5570, Train loss: -0.8131, rewards: 0.1744\n",
      "Iteration: 5580, Train loss: -0.8531, rewards: 0.1594\n",
      "Iteration: 5590, Train loss: -0.8550, rewards: 0.1725\n",
      "Iteration: 5600, Train loss: -0.7447, rewards: 0.1500\n",
      "Eval:\n",
      "Hits@1: 0.2337, Hits@3: 0.3243, Hits@10: 0.4168, MRR: 0.2943\n",
      "------------------------------------------------------------\n",
      "Iteration: 5610, Train loss: -0.8405, rewards: 0.2200\n",
      "Iteration: 5620, Train loss: -0.7322, rewards: 0.2081\n",
      "Iteration: 5630, Train loss: -0.9097, rewards: 0.1931\n",
      "Iteration: 5640, Train loss: -0.8904, rewards: 0.2069\n",
      "Iteration: 5650, Train loss: -0.7017, rewards: 0.2006\n",
      "Iteration: 5660, Train loss: -0.8381, rewards: 0.2294\n",
      "Iteration: 5670, Train loss: -0.7633, rewards: 0.2013\n",
      "Iteration: 5680, Train loss: -0.8815, rewards: 0.2338\n",
      "Iteration: 5690, Train loss: -0.7857, rewards: 0.1781\n",
      "Iteration: 5700, Train loss: -0.7226, rewards: 0.1794\n",
      "Eval:\n",
      "Hits@1: 0.2340, Hits@3: 0.3228, Hits@10: 0.4121, MRR: 0.2931\n",
      "------------------------------------------------------------\n",
      "Iteration: 5710, Train loss: -0.6598, rewards: 0.1531\n",
      "Iteration: 5720, Train loss: -0.7716, rewards: 0.1794\n",
      "Iteration: 5730, Train loss: -0.6823, rewards: 0.1512\n",
      "Iteration: 5740, Train loss: -0.8667, rewards: 0.2175\n",
      "Iteration: 5750, Train loss: -0.7379, rewards: 0.1894\n",
      "Iteration: 5760, Train loss: -0.7841, rewards: 0.1975\n",
      "Iteration: 5770, Train loss: -0.8828, rewards: 0.2056\n",
      "Iteration: 5780, Train loss: -0.7366, rewards: 0.1400\n",
      "Iteration: 5790, Train loss: -0.7195, rewards: 0.1419\n",
      "Iteration: 5800, Train loss: -0.8815, rewards: 0.2087\n",
      "Eval:\n",
      "Hits@1: 0.2343, Hits@3: 0.3266, Hits@10: 0.4214, MRR: 0.2959\n",
      "------------------------------------------------------------\n",
      "Iteration: 5810, Train loss: -0.9481, rewards: 0.1975\n",
      "Iteration: 5820, Train loss: -0.7361, rewards: 0.1806\n",
      "Iteration: 5830, Train loss: -0.8875, rewards: 0.2444\n",
      "Iteration: 5840, Train loss: -0.9054, rewards: 0.2169\n",
      "Iteration: 5850, Train loss: -0.7913, rewards: 0.1894\n",
      "Iteration: 5860, Train loss: -0.8261, rewards: 0.2387\n",
      "Iteration: 5870, Train loss: -0.7795, rewards: 0.1913\n",
      "Iteration: 5880, Train loss: -0.6910, rewards: 0.1531\n",
      "Iteration: 5890, Train loss: -0.7323, rewards: 0.1044\n",
      "Iteration: 5900, Train loss: -0.6755, rewards: 0.1288\n",
      "Eval:\n",
      "Hits@1: 0.2275, Hits@3: 0.3130, Hits@10: 0.3989, MRR: 0.2849\n",
      "------------------------------------------------------------\n",
      "Iteration: 5910, Train loss: -1.0275, rewards: 0.2362\n",
      "Iteration: 5920, Train loss: -0.8650, rewards: 0.2456\n",
      "Iteration: 5930, Train loss: -0.8631, rewards: 0.2656\n",
      "Iteration: 5940, Train loss: -0.9108, rewards: 0.2144\n",
      "Iteration: 5950, Train loss: -0.7764, rewards: 0.1719\n",
      "Iteration: 5960, Train loss: -0.8006, rewards: 0.1956\n",
      "Iteration: 5970, Train loss: -0.6586, rewards: 0.1487\n",
      "Iteration: 5980, Train loss: -0.7846, rewards: 0.2181\n",
      "Iteration: 5990, Train loss: -0.7671, rewards: 0.1862\n",
      "Iteration: 6000, Train loss: -0.7546, rewards: 0.1756\n",
      "Eval:\n",
      "Hits@1: 0.2362, Hits@3: 0.3279, Hits@10: 0.4180, MRR: 0.2973\n",
      "------------------------------------------------------------\n",
      "Iteration: 6010, Train loss: -0.9197, rewards: 0.1994\n",
      "Iteration: 6020, Train loss: -0.7671, rewards: 0.1750\n",
      "Iteration: 6030, Train loss: -0.8831, rewards: 0.2062\n",
      "Iteration: 6040, Train loss: -0.7244, rewards: 0.2119\n",
      "Iteration: 6050, Train loss: -0.8849, rewards: 0.1963\n",
      "Iteration: 6060, Train loss: -0.6908, rewards: 0.1456\n",
      "Iteration: 6070, Train loss: -0.7066, rewards: 0.1462\n",
      "Iteration: 6080, Train loss: -0.7144, rewards: 0.1419\n",
      "Iteration: 6090, Train loss: -0.6847, rewards: 0.1825\n",
      "Iteration: 6100, Train loss: -0.8325, rewards: 0.1594\n",
      "Eval:\n",
      "Hits@1: 0.2387, Hits@3: 0.3333, Hits@10: 0.4183, MRR: 0.2999\n",
      "------------------------------------------------------------\n",
      "Iteration: 6110, Train loss: -0.8414, rewards: 0.2519\n",
      "Iteration: 6120, Train loss: -0.8498, rewards: 0.1881\n",
      "Iteration: 6130, Train loss: -0.8355, rewards: 0.1588\n",
      "Iteration: 6140, Train loss: -0.7807, rewards: 0.1975\n",
      "Iteration: 6150, Train loss: -0.8451, rewards: 0.2150\n",
      "Iteration: 6160, Train loss: -0.8938, rewards: 0.1875\n",
      "Iteration: 6170, Train loss: -0.7914, rewards: 0.1537\n",
      "Iteration: 6180, Train loss: -0.9589, rewards: 0.1875\n",
      "Iteration: 6190, Train loss: -0.9095, rewards: 0.1881\n",
      "Iteration: 6200, Train loss: -0.7859, rewards: 0.2300\n",
      "Eval:\n",
      "Hits@1: 0.2351, Hits@3: 0.3223, Hits@10: 0.4141, MRR: 0.2943\n",
      "------------------------------------------------------------\n",
      "Iteration: 6210, Train loss: -0.8194, rewards: 0.1681\n",
      "Iteration: 6220, Train loss: -0.7187, rewards: 0.3056\n",
      "Iteration: 6230, Train loss: -0.8954, rewards: 0.2969\n",
      "Iteration: 6240, Train loss: -1.0321, rewards: 0.3006\n",
      "Iteration: 6250, Train loss: -0.7135, rewards: 0.1775\n",
      "Iteration: 6260, Train loss: -0.7900, rewards: 0.2219\n",
      "Iteration: 6270, Train loss: -0.7869, rewards: 0.1594\n",
      "Iteration: 6280, Train loss: -0.6419, rewards: 0.1500\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Iteration: 6290, Train loss: -0.9029, rewards: 0.2525\n",
      "Iteration: 6300, Train loss: -0.9275, rewards: 0.2850\n",
      "Eval:\n"
     ]
    }
   ],
   "source": [
    "trainer = Trainer(options)\n",
    "trainer.train()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6a50465",
   "metadata": {},
   "outputs": [],
   "source": [
    "trainer = Trainer(options)\n",
    "trainer.agent.load_state_dict(torch.load(options['model_dir'] + 'agent.ckpt'))\n",
    "trainer.agent.eval()\n",
    "trainer.test_environment = trainer.test_test_environment\n",
    "test_results = trainer.test(beam=True, print_paths=False, save_model=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cb5b6cee",
   "metadata": {},
   "outputs": [],
   "source": [
    "print(test_results)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
