{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ae8b47d9",
   "metadata": {},
   "outputs": [],
   "source": [
    "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "4c096173",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"7\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4facebb",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "4c3f89fa",
   "metadata": {},
   "outputs": [],
   "source": [
    "# # Download GloVe (one time, ~1 min)\n",
    "# ! wget http://nlp.stanford.edu/data/glove.6B.zip\n",
    "# ! unzip glove.6B.zip\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "4f573c80",
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "SQuAD Answer Generation with GloVe Embeddings + Q/K Hypothesis Testing\n",
    "\n",
    "EXPECTED PERFORMANCE:\n",
    "- With GloVe embeddings: 40-55% F1 ✓\n",
    "- Training time: ~40-50 minutes\n",
    "- Can reach 50%+ with Q/K hypothesis\n",
    "\"\"\"\n",
    "\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.utils.data import Dataset, DataLoader, Subset\n",
    "from transformers import GPT2Tokenizer\n",
    "import json\n",
    "from collections import Counter\n",
    "import string\n",
    "import re\n",
    "from tqdm import tqdm\n",
    "import numpy as np\n",
    "import os\n",
    "import urllib.request\n",
    "import zipfile\n",
    "\n",
    "# Configuration\n",
    "TEST_QK_HYPOTHESIS = True  # Set True after baseline completes\n",
    "QK_LR_MULTIPLIER = 20  # Q/K learn 2.5x faster\n",
    "\n",
    "# Optimized for GloVe embeddings\n",
    "D_MODEL = 300  # Match GloVe dimension exactly\n",
    "N_HEADS = 6\n",
    "N_LAYERS = 6\n",
    "D_FF = 1200\n",
    "MAX_SEQ_LEN = 256\n",
    "MAX_ANSWER_LEN = 50\n",
    "DROPOUT = 0.2\n",
    "BATCH_SIZE = 32\n",
    "ACCUMULATION_STEPS = 2  # Effective batch: 48\n",
    "BASE_LR = 5e-5\n",
    "WARMUP_STEPS = 1000\n",
    "NUM_EPOCHS = 100\n",
    "GRAD_CLIP = 0.5\n",
    "WEIGHT_DECAY = 0.05\n",
    "LABEL_SMOOTHING = 0.1\n",
    "TRAIN_SUBSET_SIZE = 60000  # More data with GloVe\n",
    "VAL_SUBSET_SIZE = 10000\n",
    "\n",
    "\n",
    "def download_and_extract_glove():\n",
    "    \"\"\"Download and extract GloVe embeddings\"\"\"\n",
    "    glove_file = 'glove.6B.300d.txt'\n",
    "    \n",
    "    if os.path.exists(glove_file):\n",
    "        print(f\"✓ GloVe embeddings found: {glove_file}\")\n",
    "        return glove_file\n",
    "    \n",
    "    print(\"\\n\" + \"=\"*70)\n",
    "    print(\"DOWNLOADING GLOVE EMBEDDINGS\")\n",
    "    print(\"=\"*70)\n",
    "    \n",
    "    zip_file = 'glove.6B.zip'\n",
    "    \n",
    "    if not os.path.exists(zip_file):\n",
    "        print(\"Downloading GloVe 6B (822MB)... This may take a few minutes\")\n",
    "        url = 'https://huggingface.co/stanfordnlp/glove/resolve/main/glove.6B.zip'\n",
    "        \n",
    "        try:\n",
    "            # Download with progress bar\n",
    "            response = urllib.request.urlopen(url)\n",
    "            total_size = int(response.headers.get('content-length', 0))\n",
    "            \n",
    "            with open(zip_file, 'wb') as f, tqdm(\n",
    "                total=total_size, unit='B', unit_scale=True, desc='Downloading'\n",
    "            ) as pbar:\n",
    "                while True:\n",
    "                    chunk = response.read(8192)\n",
    "                    if not chunk:\n",
    "                        break\n",
    "                    f.write(chunk)\n",
    "                    pbar.update(len(chunk))\n",
    "            \n",
    "            print(\"✓ Download complete!\")\n",
    "        except Exception as e:\n",
    "            print(f\"Download failed: {e}\")\n",
    "            print(\"\\nAlternative: Download manually from:\")\n",
    "            print(\"  https://nlp.stanford.edu/projects/glove/\")\n",
    "            print(\"  or https://huggingface.co/stanfordnlp/glove\")\n",
    "            return None\n",
    "    \n",
    "    # Extract\n",
    "    if os.path.exists(zip_file):\n",
    "        print(\"Extracting GloVe embeddings...\")\n",
    "        with zipfile.ZipFile(zip_file, 'r') as zip_ref:\n",
    "            # Only extract the 300d file we need\n",
    "            zip_ref.extract('glove.6B.300d.txt')\n",
    "        print(\"✓ Extraction complete!\")\n",
    "        \n",
    "        # Optionally remove zip to save space\n",
    "        # os.remove(zip_file)\n",
    "    \n",
    "    if os.path.exists(glove_file):\n",
    "        return glove_file\n",
    "    else:\n",
    "        print(\"⚠ GloVe file not found after extraction\")\n",
    "        return None\n",
    "\n",
    "\n",
    "def load_glove_embeddings(glove_file, tokenizer, embedding_dim=300):\n",
    "    \"\"\"Load GloVe and create embedding matrix for GPT-2 tokenizer\"\"\"\n",
    "    print(\"\\n\" + \"=\"*70)\n",
    "    print(\"LOADING GLOVE EMBEDDINGS\")\n",
    "    print(\"=\"*70)\n",
    "    \n",
    "    # Load GloVe vectors\n",
    "    print(\"Reading GloVe file (this takes ~1 minute)...\")\n",
    "    glove_vectors = {}\n",
    "    \n",
    "    with open(glove_file, 'r', encoding='utf-8') as f:\n",
    "        for line in tqdm(f, total=400000, desc=\"Loading GloVe\"):\n",
    "            values = line.rstrip().split(' ')\n",
    "            word = values[0]\n",
    "            vector = np.asarray(values[1:], dtype='float32')\n",
    "            glove_vectors[word] = vector\n",
    "    \n",
    "    print(f\"✓ Loaded {len(glove_vectors):,} GloVe vectors\")\n",
    "    \n",
    "    # Create embedding matrix for tokenizer vocabulary\n",
    "    vocab_size = tokenizer.vocab_size\n",
    "    embedding_matrix = np.random.normal(0, 0.1, (vocab_size, embedding_dim)).astype('float32')\n",
    "    \n",
    "    # Match tokenizer vocab with GloVe\n",
    "    print(\"Matching tokenizer vocabulary with GloVe...\")\n",
    "    matched = 0\n",
    "    \n",
    "    for token, idx in tqdm(tokenizer.get_vocab().items(), desc=\"Matching\"):\n",
    "        # Try different matching strategies\n",
    "        token_clean = token.replace('Ġ', '').replace('Ċ', '').lower().strip()\n",
    "        \n",
    "        if token in glove_vectors:\n",
    "            embedding_matrix[idx] = glove_vectors[token]\n",
    "            matched += 1\n",
    "        elif token.lower() in glove_vectors:\n",
    "            embedding_matrix[idx] = glove_vectors[token.lower()]\n",
    "            matched += 1\n",
    "        elif token_clean in glove_vectors:\n",
    "            embedding_matrix[idx] = glove_vectors[token_clean]\n",
    "            matched += 1\n",
    "        # For subword tokens, try averaging character embeddings\n",
    "        elif len(token_clean) > 0 and all(c.isalpha() for c in token_clean):\n",
    "            # Use random but consistent embedding for unknown tokens\n",
    "            pass\n",
    "    \n",
    "    match_rate = 100 * matched / vocab_size\n",
    "    print(f\"✓ Matched {matched:,}/{vocab_size:,} tokens ({match_rate:.1f}%)\")\n",
    "    print(\"=\"*70 + \"\\n\")\n",
    "    \n",
    "    return torch.FloatTensor(embedding_matrix)\n",
    "\n",
    "\n",
    "class MultiHeadAttention(nn.Module):\n",
    "    def __init__(self, d_model, n_heads, dropout=0.1):\n",
    "        super().__init__()\n",
    "        assert d_model % n_heads == 0\n",
    "        self.d_k = d_model // n_heads\n",
    "        self.n_heads = n_heads\n",
    "        \n",
    "        self.q_linear = nn.Linear(d_model, d_model)\n",
    "        self.k_linear = nn.Linear(d_model, d_model)\n",
    "        self.v_linear = nn.Linear(d_model, d_model)\n",
    "        self.out = nn.Linear(d_model, d_model)\n",
    "        self.dropout = nn.Dropout(dropout)\n",
    "        self.last_attention_weights = None\n",
    "        \n",
    "    def forward(self, q, k, v, mask=None, save_attention=False):\n",
    "        bs = q.size(0)\n",
    "        \n",
    "        q = self.q_linear(q).view(bs, -1, self.n_heads, self.d_k).transpose(1, 2)\n",
    "        k = self.k_linear(k).view(bs, -1, self.n_heads, self.d_k).transpose(1, 2)\n",
    "        v = self.v_linear(v).view(bs, -1, self.n_heads, self.d_k).transpose(1, 2)\n",
    "        \n",
    "        scores = torch.matmul(q, k.transpose(-2, -1)) / (self.d_k ** 0.5)\n",
    "        \n",
    "        if mask is not None:\n",
    "            scores = scores.masked_fill(mask == 0, -1e9)\n",
    "        \n",
    "        attn = torch.softmax(scores, dim=-1)\n",
    "        if save_attention:\n",
    "            self.last_attention_weights = attn.detach()\n",
    "        \n",
    "        attn = self.dropout(attn)\n",
    "        context = torch.matmul(attn, v)\n",
    "        context = context.transpose(1, 2).contiguous().view(bs, -1, self.n_heads * self.d_k)\n",
    "        \n",
    "        return self.out(context)\n",
    "\n",
    "\n",
    "class DecoderLayer(nn.Module):\n",
    "    def __init__(self, d_model, n_heads, d_ff, dropout=0.1):\n",
    "        super().__init__()\n",
    "        self.self_attn = MultiHeadAttention(d_model, n_heads, dropout)\n",
    "        self.ff = nn.Sequential(\n",
    "            nn.Linear(d_model, d_ff),\n",
    "            nn.GELU(),\n",
    "            nn.Dropout(dropout),\n",
    "            nn.Linear(d_ff, d_model),\n",
    "            nn.Dropout(dropout)\n",
    "        )\n",
    "        self.norm1 = nn.LayerNorm(d_model)\n",
    "        self.norm2 = nn.LayerNorm(d_model)\n",
    "        \n",
    "    def forward(self, x, mask=None, save_attention=False):\n",
    "        # Pre-norm\n",
    "        attn_out = self.self_attn(self.norm1(x), self.norm1(x), self.norm1(x), mask, save_attention)\n",
    "        x = x + attn_out\n",
    "        x = x + self.ff(self.norm2(x))\n",
    "        return x\n",
    "\n",
    "\n",
    "class GPTAnswerGenerator(nn.Module):\n",
    "    def __init__(self, vocab_size, d_model, n_heads, n_layers, d_ff, max_seq_len, dropout=0.1, pretrained_embeddings=None):\n",
    "        super().__init__()\n",
    "        \n",
    "        self.token_embedding = nn.Embedding(vocab_size, d_model)\n",
    "        \n",
    "        # Initialize with pretrained embeddings if provided\n",
    "        if pretrained_embeddings is not None:\n",
    "            print(\"Initializing token embeddings with GloVe...\")\n",
    "            self.token_embedding.weight.data.copy_(pretrained_embeddings)\n",
    "            print(\"✓ Token embeddings initialized with GloVe\")\n",
    "        \n",
    "        self.position_embedding = nn.Embedding(max_seq_len, d_model)\n",
    "        self.emb_dropout = nn.Dropout(dropout)\n",
    "        \n",
    "        self.layers = nn.ModuleList([\n",
    "            DecoderLayer(d_model, n_heads, d_ff, dropout) for _ in range(n_layers)\n",
    "        ])\n",
    "        \n",
    "        self.norm = nn.LayerNorm(d_model)\n",
    "        self.output = nn.Linear(d_model, vocab_size)\n",
    "        \n",
    "        # Weight tying\n",
    "        self.output.weight = self.token_embedding.weight\n",
    "        \n",
    "        # Initialize non-embedding weights\n",
    "        self._init_weights()\n",
    "        \n",
    "    def _init_weights(self):\n",
    "        # Don't reinitialize token_embedding if using GloVe\n",
    "        for name, p in self.named_parameters():\n",
    "            if 'token_embedding' not in name and p.dim() > 1:\n",
    "                nn.init.xavier_uniform_(p, gain=1/np.sqrt(2))\n",
    "        \n",
    "    def forward(self, x, mask=None, save_attention=False):\n",
    "        pos = torch.arange(x.size(1), device=x.device).unsqueeze(0)\n",
    "        x = self.token_embedding(x) + self.position_embedding(pos)\n",
    "        x = self.emb_dropout(x)\n",
    "        \n",
    "        for layer in self.layers:\n",
    "            x = layer(x, mask, save_attention)\n",
    "        \n",
    "        return self.output(self.norm(x))\n",
    "    \n",
    "    def get_attention_weights(self):\n",
    "        return [layer.self_attn.last_attention_weights for layer in self.layers]\n",
    "\n",
    "\n",
    "class SQuADDataset(Dataset):\n",
    "    def __init__(self, data_path, tokenizer, max_len, max_ans_len):\n",
    "        self.tokenizer = tokenizer\n",
    "        self.max_len = max_len\n",
    "        self.max_ans_len = max_ans_len\n",
    "        self.data = []\n",
    "        \n",
    "        with open(data_path, 'r') as f:\n",
    "            squad = json.load(f)\n",
    "        \n",
    "        for article in squad['data']:\n",
    "            for para in article['paragraphs']:\n",
    "                ctx = para['context']\n",
    "                for qa in para['qas']:\n",
    "                    if not qa['is_impossible'] and qa['answers']:\n",
    "                        ans = qa['answers'][0]['text']\n",
    "                        ans_start = qa['answers'][0]['answer_start']\n",
    "                        \n",
    "                        # Extract relevant context window\n",
    "                        start = max(0, ans_start - 200)\n",
    "                        end = min(len(ctx), ans_start + len(ans) + 200)\n",
    "                        focused_ctx = ctx[start:end]\n",
    "                        \n",
    "                        self.data.append({\n",
    "                            'context': focused_ctx,\n",
    "                            'question': qa['question'],\n",
    "                            'answer': ans\n",
    "                        })\n",
    "    \n",
    "    def __len__(self):\n",
    "        return len(self.data)\n",
    "    \n",
    "    def __getitem__(self, idx):\n",
    "        item = self.data[idx]\n",
    "        \n",
    "        # Format: Q: question C: context A: answer\n",
    "        prefix = f\"Q: {item['question']} C: {item['context']} A:\"\n",
    "        answer = f\" {item['answer']}\"\n",
    "        \n",
    "        prefix_ids = self.tokenizer.encode(prefix, max_length=self.max_len-self.max_ans_len-2, \n",
    "                                          truncation=True, add_special_tokens=False)\n",
    "        answer_ids = self.tokenizer.encode(answer, max_length=self.max_ans_len, \n",
    "                                          truncation=True, add_special_tokens=False)\n",
    "        answer_ids.append(self.tokenizer.eos_token_id)\n",
    "        \n",
    "        input_ids = prefix_ids + answer_ids\n",
    "        labels = [-100] * len(prefix_ids) + answer_ids\n",
    "        \n",
    "        # Pad\n",
    "        while len(input_ids) < self.max_len:\n",
    "            input_ids.append(self.tokenizer.pad_token_id)\n",
    "            labels.append(-100)\n",
    "        \n",
    "        return {\n",
    "            'input_ids': torch.tensor(input_ids[:self.max_len]),\n",
    "            'labels': torch.tensor(labels[:self.max_len])\n",
    "        }\n",
    "\n",
    "\n",
    "def create_mask(seq_len, device):\n",
    "    return (torch.triu(torch.ones(seq_len, seq_len, device=device), 1) == 0).unsqueeze(0).unsqueeze(0)\n",
    "\n",
    "\n",
    "def normalize_answer(s):\n",
    "    s = s.lower()\n",
    "    s = re.sub(r'\\b(a|an|the)\\b', ' ', s)\n",
    "    s = ''.join(c for c in s if c not in string.punctuation)\n",
    "    return ' '.join(s.split())\n",
    "\n",
    "\n",
    "def f1_score(pred, truth):\n",
    "    pred_tok = normalize_answer(pred).split()\n",
    "    truth_tok = normalize_answer(truth).split()\n",
    "    \n",
    "    if not pred_tok or not truth_tok:\n",
    "        return int(pred_tok == truth_tok)\n",
    "    \n",
    "    common = Counter(pred_tok) & Counter(truth_tok)\n",
    "    if not common:\n",
    "        return 0\n",
    "    \n",
    "    prec = sum(common.values()) / len(pred_tok)\n",
    "    rec = sum(common.values()) / len(truth_tok)\n",
    "    return 2 * prec * rec / (prec + rec)\n",
    "\n",
    "\n",
    "def exact_match(pred, truth):\n",
    "    return int(normalize_answer(pred) == normalize_answer(truth))\n",
    "\n",
    "\n",
    "def train_epoch(model, loader, opt, sched, device, epoch):\n",
    "    model.train()\n",
    "    total_loss = 0\n",
    "    opt.zero_grad()\n",
    "    \n",
    "    pbar = tqdm(loader, desc=f\"Epoch {epoch}\")\n",
    "    for i, batch in enumerate(pbar):\n",
    "        inp = batch['input_ids'].to(device)\n",
    "        lbl = batch['labels'].to(device)\n",
    "        \n",
    "        mask = create_mask(inp.size(1), device)\n",
    "        logits = model(inp, mask)\n",
    "        \n",
    "        # Shift for next-token prediction\n",
    "        loss = nn.functional.cross_entropy(\n",
    "            logits[:, :-1].reshape(-1, logits.size(-1)),\n",
    "            lbl[:, 1:].reshape(-1),\n",
    "            ignore_index=-100,\n",
    "            label_smoothing=LABEL_SMOOTHING\n",
    "        )\n",
    "        \n",
    "        loss = loss / ACCUMULATION_STEPS\n",
    "        loss.backward()\n",
    "        \n",
    "        if (i + 1) % ACCUMULATION_STEPS == 0:\n",
    "            nn.utils.clip_grad_norm_(model.parameters(), GRAD_CLIP)\n",
    "            opt.step()\n",
    "            sched.step()\n",
    "            opt.zero_grad()\n",
    "        \n",
    "        total_loss += loss.item() * ACCUMULATION_STEPS\n",
    "        pbar.set_postfix({'loss': f'{loss.item() * ACCUMULATION_STEPS:.3f}'})\n",
    "    \n",
    "    return total_loss / len(loader)\n",
    "\n",
    "\n",
    "def generate(model, tokenizer, context, question, device, max_len=50):\n",
    "    model.eval()\n",
    "    \n",
    "    prompt = f\"Q: {question} C: {context} A:\"\n",
    "    ids = tokenizer.encode(prompt, max_length=MAX_SEQ_LEN-max_len-5, \n",
    "                          truncation=True, add_special_tokens=False, return_tensors='pt').to(device)\n",
    "    \n",
    "    start_len = ids.size(1)\n",
    "    \n",
    "    with torch.no_grad():\n",
    "        for _ in range(max_len):\n",
    "            if ids.size(1) >= MAX_SEQ_LEN:\n",
    "                break\n",
    "            \n",
    "            mask = create_mask(ids.size(1), device)\n",
    "            logits = model(ids, mask)\n",
    "            next_tok = logits[:, -1].argmax(-1, keepdim=True)\n",
    "            ids = torch.cat([ids, next_tok], 1)\n",
    "            \n",
    "            if next_tok.item() == tokenizer.eos_token_id:\n",
    "                break\n",
    "    \n",
    "    return tokenizer.decode(ids[0, start_len:], skip_special_tokens=True).strip()\n",
    "\n",
    "\n",
    "def evaluate(model, dataset, tokenizer, device, n_samples=300):\n",
    "    model.eval()\n",
    "    f1_sum = em_sum = 0\n",
    "    \n",
    "    if isinstance(dataset, Subset):\n",
    "        items = [dataset.dataset.data[dataset.indices[i]] for i in range(min(n_samples, len(dataset)))]\n",
    "    else:\n",
    "        items = dataset.data[:n_samples]\n",
    "    \n",
    "    for item in tqdm(items, desc=\"Eval\"):\n",
    "        pred = generate(model, tokenizer, item['context'], item['question'], device)\n",
    "        f1_sum += f1_score(pred, item['answer'])\n",
    "        em_sum += exact_match(pred, item['answer'])\n",
    "    \n",
    "    return {'f1': f1_sum / len(items), 'em': em_sum / len(items)}\n",
    "\n",
    "\n",
    "def analyze_attention(model, dataset, tokenizer, device, n=30):\n",
    "    model.eval()\n",
    "    scores = []\n",
    "    \n",
    "    if isinstance(dataset, Subset):\n",
    "        items = [dataset.dataset.data[dataset.indices[i]] for i in range(min(n, len(dataset)))]\n",
    "    else:\n",
    "        items = dataset.data[:n]\n",
    "    \n",
    "    for item in items:\n",
    "        prompt = f\"Q: {item['question']} C: {item['context']} A:\"\n",
    "        ids = tokenizer.encode(prompt, max_length=MAX_SEQ_LEN-MAX_ANSWER_LEN, \n",
    "                              truncation=True, add_special_tokens=False, return_tensors='pt').to(device)\n",
    "        \n",
    "        with torch.no_grad():\n",
    "            mask = create_mask(ids.size(1), device)\n",
    "            model(ids, mask, save_attention=True)\n",
    "            \n",
    "            weights = model.get_attention_weights()\n",
    "            if weights[0] is not None:\n",
    "                avg = torch.stack([w[0] for w in weights if w is not None]).mean(0)\n",
    "                scores.append(avg[0].mean().item())\n",
    "    \n",
    "    return np.mean(scores) if scores else 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "13b236a4",
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "======================================================================\n",
      "SQUAD ANSWER GENERATION WITH GLOVE EMBEDDINGS\n",
      "======================================================================\n",
      "Model: 6L, 300d, 6h\n",
      "Device: cuda\n",
      "======================================================================\n",
      "\n",
      "✓ GloVe embeddings found: glove.6B.300d.txt\n",
      "Loading tokenizer...\n",
      "\n",
      "======================================================================\n",
      "LOADING GLOVE EMBEDDINGS\n",
      "======================================================================\n",
      "Reading GloVe file (this takes ~1 minute)...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading GloVe: 100%|█████████████████| 400000/400000 [00:19<00:00, 20156.51it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Loaded 400,000 GloVe vectors\n",
      "Matching tokenizer vocabulary with GloVe...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Matching: 100%|███████████████████████| 50257/50257 [00:00<00:00, 368110.81it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Matched 43,058/50,257 tokens (85.7%)\n",
      "======================================================================\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading datasets...\n",
      "Train: 60000, Val: 5928\n",
      "\n",
      "Initializing model...\n",
      "Training for seed 1234\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Total parameters: 21.7M\n",
      "Trainable parameters: 21.7M\n",
      "\n",
      "======================================================================\n",
      "TESTING Q/K HYPOTHESIS - Q/K LR = 20x\n",
      "======================================================================\n",
      "\n",
      "Q/K params: 1.1M\n",
      "Other params: 5.5M\n",
      "\n",
      "\n",
      "======================================================================\n",
      "EPOCH 1/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=8.156]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 11.5413\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.16it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 27.03it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0000 | Val F1: 0.0033 | Gap: -0.0033 | EM: 0.0033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 12\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0033\n",
      "\n",
      "======================================================================\n",
      "EPOCH 2/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=7.039]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.5533\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.84it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:38<00:00,  7.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0126 | Val F1: 0.0224 | Gap: -0.0098 | EM: 0.0067\n",
      "✓ SAVED! Best F1: 0.0224\n",
      "\n",
      "======================================================================\n",
      "EPOCH 3/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 3: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=7.182]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.0917\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.50it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:32<00:00,  9.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0386 | Val F1: 0.0291 | Gap: 0.0094 | EM: 0.0033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: three\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0291\n",
      "\n",
      "======================================================================\n",
      "EPOCH 4/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 4: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.255]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.8343\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0582 | Val F1: 0.0391 | Gap: 0.0191 | EM: 0.0033\n",
      "✓ SAVED! Best F1: 0.0391\n",
      "\n",
      "======================================================================\n",
      "EPOCH 5/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 5: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.902]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.6468\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.83it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 10.96it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0780 | Val F1: 0.0545 | Gap: 0.0235 | EM: 0.0067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0545\n",
      "\n",
      "======================================================================\n",
      "EPOCH 6/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 6: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.739]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.4910\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 13.14it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.53it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0849 | Val F1: 0.0692 | Gap: 0.0157 | EM: 0.0133\n",
      "✓ SAVED! Best F1: 0.0692\n",
      "\n",
      "======================================================================\n",
      "EPOCH 7/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 7: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.319]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.3653\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 10.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1106 | Val F1: 0.0825 | Gap: 0.0281 | EM: 0.0300\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0825\n",
      "\n",
      "======================================================================\n",
      "EPOCH 8/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.177]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.2601\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.33it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:31<00:00,  9.43it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1363 | Val F1: 0.0890 | Gap: 0.0473 | EM: 0.0333\n",
      "✓ SAVED! Best F1: 0.0890\n",
      "\n",
      "======================================================================\n",
      "EPOCH 9/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.451]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.1596\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.15it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.90it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1194 | Val F1: 0.1084 | Gap: 0.0110 | EM: 0.0467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1084\n",
      "\n",
      "======================================================================\n",
      "EPOCH 10/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.838]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.0693\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.72it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1046 | Val F1: 0.1182 | Gap: -0.0136 | EM: 0.0533\n",
      "✓ SAVED! Best F1: 0.1182\n",
      "\n",
      "======================================================================\n",
      "EPOCH 11/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 11: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.033]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9899\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:32<00:00,  9.30it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1225 | Val F1: 0.1201 | Gap: 0.0024 | EM: 0.0567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1201\n",
      "\n",
      "======================================================================\n",
      "EPOCH 12/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 12: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.274]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9192\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.17it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 11.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1611 | Val F1: 0.1320 | Gap: 0.0291 | EM: 0.0667\n",
      "✓ SAVED! Best F1: 0.1320\n",
      "\n",
      "======================================================================\n",
      "EPOCH 13/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 13: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.790]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.8500\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.71it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:33<00:00,  8.98it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1474 | Val F1: 0.1339 | Gap: 0.0134 | EM: 0.0767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1339\n",
      "\n",
      "======================================================================\n",
      "EPOCH 14/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 14: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.923]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7848\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.35it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.76it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1659 | Val F1: 0.1283 | Gap: 0.0376 | EM: 0.0567\n",
      "\n",
      "======================================================================\n",
      "EPOCH 15/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 15: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.293]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7178\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.62it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:33<00:00,  9.03it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1857 | Val F1: 0.1491 | Gap: 0.0366 | EM: 0.0700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1491\n",
      "\n",
      "======================================================================\n",
      "EPOCH 16/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 16: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.635]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6613\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.48it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2016 | Val F1: 0.1649 | Gap: 0.0367 | EM: 0.0900\n",
      "✓ SAVED! Best F1: 0.1649\n",
      "\n",
      "======================================================================\n",
      "EPOCH 17/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 17: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.496]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6030\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.79it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.50it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1919 | Val F1: 0.1798 | Gap: 0.0120 | EM: 0.0967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1798\n",
      "\n",
      "======================================================================\n",
      "EPOCH 18/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 18: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.467]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5458\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:23<00:00,  8.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:38<00:00,  7.80it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2134 | Val F1: 0.1563 | Gap: 0.0571 | EM: 0.0767\n",
      "\n",
      "======================================================================\n",
      "EPOCH 19/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 19: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.884]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.4876\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.40it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2299 | Val F1: 0.1726 | Gap: 0.0573 | EM: 0.0967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 20/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 20: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.053]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.4293\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00, 10.00it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2473 | Val F1: 0.1716 | Gap: 0.0757 | EM: 0.0900\n",
      "\n",
      "======================================================================\n",
      "EPOCH 21/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 21: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.314]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.3735\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.25it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2012 | Val F1: 0.1689 | Gap: 0.0324 | EM: 0.0900\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5.5.5.5.5.5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 22/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 22: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.037]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.3178\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.34it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:31<00:00,  9.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2165 | Val F1: 0.1623 | Gap: 0.0542 | EM: 0.0867\n",
      "\n",
      "======================================================================\n",
      "EPOCH 23/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 23: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.510]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.2602\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 10.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2320 | Val F1: 0.1786 | Gap: 0.0533 | EM: 0.0967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 24/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 24: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.253]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.1878\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.75it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.12it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2594 | Val F1: 0.1792 | Gap: 0.0802 | EM: 0.1000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 25/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 25: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.206]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.1124\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.04it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2672 | Val F1: 0.2006 | Gap: 0.0667 | EM: 0.1067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.2006\n",
      "\n",
      "======================================================================\n",
      "EPOCH 26/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 26: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.381]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.0208\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.38it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2287 | Val F1: 0.1739 | Gap: 0.0548 | EM: 0.1000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 27/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 27: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.241]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.9006\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.67it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.87it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2575 | Val F1: 0.2167 | Gap: 0.0408 | EM: 0.1100\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.2167\n",
      "\n",
      "======================================================================\n",
      "EPOCH 28/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 28: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.673]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.7673\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 13.26it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.94it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2853 | Val F1: 0.2333 | Gap: 0.0520 | EM: 0.1200\n",
      "✓ SAVED! Best F1: 0.2333\n",
      "\n",
      "======================================================================\n",
      "EPOCH 29/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 29: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.327]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.6243\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2946 | Val F1: 0.2520 | Gap: 0.0427 | EM: 0.1300\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.2520\n",
      "\n",
      "======================================================================\n",
      "EPOCH 30/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 30: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.910]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.4817\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.90it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2806 | Val F1: 0.2525 | Gap: 0.0281 | EM: 0.1300\n",
      "✓ SAVED! Best F1: 0.2525\n",
      "\n",
      "======================================================================\n",
      "EPOCH 31/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 31: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.965]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.3450\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.37it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 16.79it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3242 | Val F1: 0.2840 | Gap: 0.0402 | EM: 0.1567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.2840\n",
      "\n",
      "======================================================================\n",
      "EPOCH 32/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 32: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.950]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.2272\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.48it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.17it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3330 | Val F1: 0.2953 | Gap: 0.0378 | EM: 0.1433\n",
      "✓ SAVED! Best F1: 0.2953\n",
      "\n",
      "======================================================================\n",
      "EPOCH 33/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 33: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.818]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.1151\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 15.44it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 16.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3488 | Val F1: 0.2949 | Gap: 0.0539 | EM: 0.1700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 34/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 34: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.996]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.0132\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 14.95it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3811 | Val F1: 0.3205 | Gap: 0.0606 | EM: 0.1700\n",
      "✓ SAVED! Best F1: 0.3205\n",
      "\n",
      "======================================================================\n",
      "EPOCH 35/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 35: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.214]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.9195\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 18.67it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.20it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3780 | Val F1: 0.3363 | Gap: 0.0417 | EM: 0.1733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.3363\n",
      "\n",
      "======================================================================\n",
      "EPOCH 36/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 36: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.683]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.8351\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.98it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 17.94it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4052 | Val F1: 0.3179 | Gap: 0.0873 | EM: 0.1633\n",
      "\n",
      "======================================================================\n",
      "EPOCH 37/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 37: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.494]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.7576\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.45it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 16.15it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4274 | Val F1: 0.3435 | Gap: 0.0839 | EM: 0.1833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.3435\n",
      "\n",
      "======================================================================\n",
      "EPOCH 38/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 38: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.414]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.6868\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.98it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 18.78it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3933 | Val F1: 0.3456 | Gap: 0.0478 | EM: 0.1900\n",
      "✓ SAVED! Best F1: 0.3456\n",
      "\n",
      "======================================================================\n",
      "EPOCH 39/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 39: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.700]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.6180\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 16.96it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 16.88it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4469 | Val F1: 0.3658 | Gap: 0.0811 | EM: 0.2000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gig atons\n",
      "  F1: 0.400\n",
      "✓ SAVED! Best F1: 0.3658\n",
      "\n",
      "======================================================================\n",
      "EPOCH 40/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 40: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.717]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5567\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.75it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4145 | Val F1: 0.3865 | Gap: 0.0281 | EM: 0.2400\n",
      "✓ SAVED! Best F1: 0.3865\n",
      "\n",
      "======================================================================\n",
      "EPOCH 41/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 41: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.455]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4963\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4694 | Val F1: 0.3956 | Gap: 0.0738 | EM: 0.2333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gig atons\n",
      "  F1: 0.400\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.3956\n",
      "\n",
      "======================================================================\n",
      "EPOCH 42/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 42: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.487]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4418\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 16.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 17.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4614 | Val F1: 0.4194 | Gap: 0.0420 | EM: 0.2233\n",
      "✓ SAVED! Best F1: 0.4194\n",
      "\n",
      "======================================================================\n",
      "EPOCH 43/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 43: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.372]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3911\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.52it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4626 | Val F1: 0.4018 | Gap: 0.0608 | EM: 0.2400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 44/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 44: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.301]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3434\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.66it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4555 | Val F1: 0.3734 | Gap: 0.0821 | EM: 0.2133\n",
      "\n",
      "======================================================================\n",
      "EPOCH 45/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 45: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.247]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2958\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.32it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4681 | Val F1: 0.3960 | Gap: 0.0721 | EM: 0.2233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 46/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 46: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.395]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2543\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.25it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.53it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5038 | Val F1: 0.4254 | Gap: 0.0785 | EM: 0.2433\n",
      "✓ SAVED! Best F1: 0.4254\n",
      "\n",
      "======================================================================\n",
      "EPOCH 47/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 47: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.021]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2174\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.71it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4816 | Val F1: 0.4362 | Gap: 0.0454 | EM: 0.2633\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gig atons\n",
      "  F1: 0.400\n",
      "✓ SAVED! Best F1: 0.4362\n",
      "\n",
      "======================================================================\n",
      "EPOCH 48/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 48: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.765]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1805\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.93it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4737 | Val F1: 0.4380 | Gap: 0.0357 | EM: 0.2500\n",
      "✓ SAVED! Best F1: 0.4380\n",
      "\n",
      "======================================================================\n",
      "EPOCH 49/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 49: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.041]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1463\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.60it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5700 | Val F1: 0.4486 | Gap: 0.1214 | EM: 0.2667\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4486\n",
      "\n",
      "======================================================================\n",
      "EPOCH 50/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 50: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.17it/s, loss=3.002]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1106\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.81it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.71it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5111 | Val F1: 0.4320 | Gap: 0.0791 | EM: 0.2400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 51/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 51: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.744]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0799\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.40it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.79it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5397 | Val F1: 0.4611 | Gap: 0.0786 | EM: 0.2633\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4611\n",
      "\n",
      "======================================================================\n",
      "EPOCH 52/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 52: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.992]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0496\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5225 | Val F1: 0.4466 | Gap: 0.0759 | EM: 0.2700\n",
      "\n",
      "======================================================================\n",
      "EPOCH 53/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 53: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.051]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0186\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 18.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5613 | Val F1: 0.4737 | Gap: 0.0876 | EM: 0.2767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4737\n",
      "\n",
      "======================================================================\n",
      "EPOCH 54/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 54: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.839]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9939\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.48it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5851 | Val F1: 0.4444 | Gap: 0.1407 | EM: 0.2500\n",
      "\n",
      "======================================================================\n",
      "EPOCH 55/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 55: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.777]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9671\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.48it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5732 | Val F1: 0.4588 | Gap: 0.1145 | EM: 0.2700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 56/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 56: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.886]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9486\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.89it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.07it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5440 | Val F1: 0.4513 | Gap: 0.0927 | EM: 0.2467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 57/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 57: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.891]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9235\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.81it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5852 | Val F1: 0.4639 | Gap: 0.1213 | EM: 0.2567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 58/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 58: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.937]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9021\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5739 | Val F1: 0.4853 | Gap: 0.0886 | EM: 0.2767\n",
      "✓ SAVED! Best F1: 0.4853\n",
      "\n",
      "======================================================================\n",
      "EPOCH 59/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 59: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.585]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8824\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.99it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.64it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5787 | Val F1: 0.4504 | Gap: 0.1283 | EM: 0.2533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 60/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 60: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.926]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8618\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6066 | Val F1: 0.5156 | Gap: 0.0911 | EM: 0.3067\n",
      "✓ SAVED! Best F1: 0.5156\n",
      "\n",
      "======================================================================\n",
      "EPOCH 61/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 61: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.17it/s, loss=2.810]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8411\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 22.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.29it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5946 | Val F1: 0.5003 | Gap: 0.0943 | EM: 0.2800\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 62/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 62: 100%|█████████████████| 1875/1875 [05:04<00:00,  6.16it/s, loss=2.879]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8224\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.24it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5916 | Val F1: 0.5347 | Gap: 0.0569 | EM: 0.3100\n",
      "✓ SAVED! Best F1: 0.5347\n",
      "\n",
      "======================================================================\n",
      "EPOCH 63/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 63: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.473]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8031\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.38it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6125 | Val F1: 0.5268 | Gap: 0.0857 | EM: 0.3167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 64/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 64: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.857]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7873\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.58it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6096 | Val F1: 0.4919 | Gap: 0.1177 | EM: 0.2833\n",
      "\n",
      "======================================================================\n",
      "EPOCH 65/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 65: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.768]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7723\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6271 | Val F1: 0.4958 | Gap: 0.1313 | EM: 0.2833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 66/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 66: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.872]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7561\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.94it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.67it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6051 | Val F1: 0.5289 | Gap: 0.0762 | EM: 0.3167\n",
      "\n",
      "======================================================================\n",
      "EPOCH 67/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 67: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.676]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7388\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6067 | Val F1: 0.5167 | Gap: 0.0899 | EM: 0.2967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 68/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 68: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.778]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7195\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.16it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.84it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6362 | Val F1: 0.4841 | Gap: 0.1521 | EM: 0.2800\n",
      "\n",
      "======================================================================\n",
      "EPOCH 69/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 69: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.155]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7044\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.86it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6710 | Val F1: 0.5419 | Gap: 0.1292 | EM: 0.3233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5419\n",
      "\n",
      "======================================================================\n",
      "EPOCH 70/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 70: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.778]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6950\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.98it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.12it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6447 | Val F1: 0.5121 | Gap: 0.1326 | EM: 0.2900\n",
      "\n",
      "======================================================================\n",
      "EPOCH 71/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 71: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.702]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6821\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.21it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.65it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6368 | Val F1: 0.5081 | Gap: 0.1287 | EM: 0.2933\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 72/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 72: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.752]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6678\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.45it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6545 | Val F1: 0.5332 | Gap: 0.1213 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 73/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 73: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.471]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6541\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.50it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.32it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6849 | Val F1: 0.5257 | Gap: 0.1593 | EM: 0.3167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 74/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 74: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.505]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6436\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.66it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.30it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6768 | Val F1: 0.5366 | Gap: 0.1402 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 75/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 75: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.527]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6281\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.97it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6763 | Val F1: 0.5308 | Gap: 0.1455 | EM: 0.3233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 76/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 76: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.483]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6165\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.84it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6626 | Val F1: 0.5295 | Gap: 0.1331 | EM: 0.3100\n",
      "\n",
      "======================================================================\n",
      "EPOCH 77/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 77: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.355]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6062\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.26it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6617 | Val F1: 0.5128 | Gap: 0.1489 | EM: 0.3000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 78/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 78: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.500]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5939\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.16it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.87it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6890 | Val F1: 0.5336 | Gap: 0.1554 | EM: 0.3133\n",
      "\n",
      "======================================================================\n",
      "EPOCH 79/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 79: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.376]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5798\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.82it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.65it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6971 | Val F1: 0.5329 | Gap: 0.1641 | EM: 0.3200\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 80/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 80: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.528]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5698\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 27.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6929 | Val F1: 0.5238 | Gap: 0.1691 | EM: 0.3067\n",
      "\n",
      "======================================================================\n",
      "EPOCH 81/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 81: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.927]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5574\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.14it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7014 | Val F1: 0.5485 | Gap: 0.1528 | EM: 0.3267\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5485\n",
      "\n",
      "======================================================================\n",
      "EPOCH 82/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 82: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.707]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5496\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.79it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6841 | Val F1: 0.5248 | Gap: 0.1593 | EM: 0.2933\n",
      "\n",
      "======================================================================\n",
      "EPOCH 83/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 83: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.528]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5408\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7187 | Val F1: 0.5491 | Gap: 0.1696 | EM: 0.3367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5491\n",
      "\n",
      "======================================================================\n",
      "EPOCH 84/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 84: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.409]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5280\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6854 | Val F1: 0.5538 | Gap: 0.1316 | EM: 0.3167\n",
      "✓ SAVED! Best F1: 0.5538\n",
      "\n",
      "======================================================================\n",
      "EPOCH 85/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 85: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.545]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5208\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.51it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.20it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6960 | Val F1: 0.5467 | Gap: 0.1493 | EM: 0.3100\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 86/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 86: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.483]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5070\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.54it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.51it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6918 | Val F1: 0.5396 | Gap: 0.1523 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 87/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 87: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.475]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5009\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.88it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.98it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6852 | Val F1: 0.5758 | Gap: 0.1094 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5758\n",
      "\n",
      "======================================================================\n",
      "EPOCH 88/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 88: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.421]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4903\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.15it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6817 | Val F1: 0.5543 | Gap: 0.1274 | EM: 0.3267\n",
      "\n",
      "======================================================================\n",
      "EPOCH 89/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 89: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.441]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4803\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.82it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7166 | Val F1: 0.5642 | Gap: 0.1524 | EM: 0.3433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 90/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 90: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.328]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4741\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.10it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7406 | Val F1: 0.5541 | Gap: 0.1865 | EM: 0.3333\n",
      "\n",
      "======================================================================\n",
      "EPOCH 91/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 91: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.415]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4668\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.15it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7439 | Val F1: 0.5571 | Gap: 0.1868 | EM: 0.3333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 92/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 92: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.570]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4550\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7409 | Val F1: 0.5743 | Gap: 0.1666 | EM: 0.3500\n",
      "\n",
      "======================================================================\n",
      "EPOCH 93/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 93: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.345]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4490\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.35it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.75it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7225 | Val F1: 0.5967 | Gap: 0.1258 | EM: 0.3700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gigatons\n",
      "  F1: 1.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5967\n",
      "\n",
      "======================================================================\n",
      "EPOCH 94/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 94: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.359]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4430\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7469 | Val F1: 0.5745 | Gap: 0.1724 | EM: 0.3567\n",
      "\n",
      "======================================================================\n",
      "EPOCH 95/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 95: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.446]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4327\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.79it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.55it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7459 | Val F1: 0.5998 | Gap: 0.1461 | EM: 0.3767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5998\n",
      "\n",
      "======================================================================\n",
      "EPOCH 96/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 96: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.488]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4255\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.88it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.30it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7166 | Val F1: 0.5855 | Gap: 0.1311 | EM: 0.3633\n",
      "\n",
      "======================================================================\n",
      "EPOCH 97/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 97: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.564]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4180\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.78it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.46it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7237 | Val F1: 0.5783 | Gap: 0.1454 | EM: 0.3433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 98/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 98: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.540]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4093\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.72it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.70it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7679 | Val F1: 0.5990 | Gap: 0.1688 | EM: 0.3700\n",
      "\n",
      "======================================================================\n",
      "EPOCH 99/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 99: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.536]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3996\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.17it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7383 | Val F1: 0.5635 | Gap: 0.1748 | EM: 0.3333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 100/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 100: 100%|████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.332]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3983\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.61it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7376 | Val F1: 0.5528 | Gap: 0.1848 | EM: 0.3400\n",
      "\n",
      "======================================================================\n",
      "FINAL RESULTS\n",
      "======================================================================\n",
      "Best Val F1: 60.0%\n",
      "Final Val F1: 55.3%\n",
      "Final EM: 34.0%\n",
      "Train-Val Gap: 0.1848\n",
      "Training for seed 1235\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Total parameters: 21.7M\n",
      "Trainable parameters: 21.7M\n",
      "\n",
      "======================================================================\n",
      "TESTING Q/K HYPOTHESIS - Q/K LR = 20x\n",
      "======================================================================\n",
      "\n",
      "Q/K params: 1.1M\n",
      "Other params: 5.5M\n",
      "\n",
      "\n",
      "======================================================================\n",
      "EPOCH 1/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=7.872]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 11.6262\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.75it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 29.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0017 | Val F1: 0.0030 | Gap: -0.0013 | EM: 0.0000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 6\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0030\n",
      "\n",
      "======================================================================\n",
      "EPOCH 2/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=7.309]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.5535\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.32it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0140 | Val F1: 0.0096 | Gap: 0.0044 | EM: 0.0000\n",
      "✓ SAVED! Best F1: 0.0096\n",
      "\n",
      "======================================================================\n",
      "EPOCH 3/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 3: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.724]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.1015\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:22<00:00,  8.73it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:34<00:00,  8.77it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0291 | Val F1: 0.0355 | Gap: -0.0063 | EM: 0.0100\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0355\n",
      "\n",
      "======================================================================\n",
      "EPOCH 4/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 4: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.417]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.8508\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.59it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:35<00:00,  8.55it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0426 | Val F1: 0.0403 | Gap: 0.0023 | EM: 0.0033\n",
      "✓ SAVED! Best F1: 0.0403\n",
      "\n",
      "======================================================================\n",
      "EPOCH 5/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 5: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.452]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.6697\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:32<00:00,  9.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0420 | Val F1: 0.0623 | Gap: -0.0202 | EM: 0.0167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0623\n",
      "\n",
      "======================================================================\n",
      "EPOCH 6/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 6: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.828]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.5304\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0473 | Val F1: 0.0688 | Gap: -0.0215 | EM: 0.0233\n",
      "✓ SAVED! Best F1: 0.0688\n",
      "\n",
      "======================================================================\n",
      "EPOCH 7/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 7: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.468]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.4081\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.40it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0620 | Val F1: 0.0670 | Gap: -0.0050 | EM: 0.0233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 8/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.393]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.3035\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.68it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:32<00:00,  9.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0838 | Val F1: 0.0520 | Gap: 0.0318 | EM: 0.0167\n",
      "\n",
      "======================================================================\n",
      "EPOCH 9/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.517]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.2079\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 14.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.70it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0992 | Val F1: 0.0634 | Gap: 0.0358 | EM: 0.0267\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 10/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.112]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.1223\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:31<00:00,  9.57it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1030 | Val F1: 0.0878 | Gap: 0.0152 | EM: 0.0400\n",
      "✓ SAVED! Best F1: 0.0878\n",
      "\n",
      "======================================================================\n",
      "EPOCH 11/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 11: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.866]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.0429\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:35<00:00,  8.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1037 | Val F1: 0.0802 | Gap: 0.0235 | EM: 0.0400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 12/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 12: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.887]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9711\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:39<00:00,  7.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1442 | Val F1: 0.0926 | Gap: 0.0516 | EM: 0.0433\n",
      "✓ SAVED! Best F1: 0.0926\n",
      "\n",
      "======================================================================\n",
      "EPOCH 13/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 13: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.512]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9003\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:32<00:00,  9.25it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1492 | Val F1: 0.0973 | Gap: 0.0519 | EM: 0.0533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0973\n",
      "\n",
      "======================================================================\n",
      "EPOCH 14/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 14: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.330]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.8386\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.97it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1494 | Val F1: 0.1140 | Gap: 0.0355 | EM: 0.0567\n",
      "✓ SAVED! Best F1: 0.1140\n",
      "\n",
      "======================================================================\n",
      "EPOCH 15/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 15: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.604]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7725\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.27it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:38<00:00,  7.80it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1597 | Val F1: 0.1239 | Gap: 0.0357 | EM: 0.0533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2. 5\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1239\n",
      "\n",
      "======================================================================\n",
      "EPOCH 16/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 16: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.789]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7188\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.54it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1562 | Val F1: 0.1041 | Gap: 0.0521 | EM: 0.0533\n",
      "\n",
      "======================================================================\n",
      "EPOCH 17/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 17: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.970]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6571\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.86it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1420 | Val F1: 0.1156 | Gap: 0.0264 | EM: 0.0533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2.5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 18/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 18: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.151]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6020\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.51it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1475 | Val F1: 0.1229 | Gap: 0.0246 | EM: 0.0533\n",
      "\n",
      "======================================================================\n",
      "EPOCH 19/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 19: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.052]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5473\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.44it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1573 | Val F1: 0.1229 | Gap: 0.0344 | EM: 0.0600\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 20/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 20: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.073]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.4937\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.63it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.01it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1625 | Val F1: 0.1546 | Gap: 0.0079 | EM: 0.0667\n",
      "✓ SAVED! Best F1: 0.1546\n",
      "\n",
      "======================================================================\n",
      "EPOCH 21/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 21: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.296]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.4369\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.22it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1703 | Val F1: 0.1504 | Gap: 0.0199 | EM: 0.0767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 22/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 22: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.520]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.3798\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.34it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1918 | Val F1: 0.1723 | Gap: 0.0195 | EM: 0.0767\n",
      "✓ SAVED! Best F1: 0.1723\n",
      "\n",
      "======================================================================\n",
      "EPOCH 23/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 23: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.542]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.3098\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.76it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.57it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2195 | Val F1: 0.1642 | Gap: 0.0552 | EM: 0.0900\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 24/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 24: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.798]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.2345\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.60it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 11.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1891 | Val F1: 0.1800 | Gap: 0.0091 | EM: 0.0967\n",
      "✓ SAVED! Best F1: 0.1800\n",
      "\n",
      "======================================================================\n",
      "EPOCH 25/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 25: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.111]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.1363\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.79it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2381 | Val F1: 0.2149 | Gap: 0.0232 | EM: 0.1233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.2149\n",
      "\n",
      "======================================================================\n",
      "EPOCH 26/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 26: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=5.059]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.0177\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.67it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.16it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2619 | Val F1: 0.2265 | Gap: 0.0354 | EM: 0.1233\n",
      "✓ SAVED! Best F1: 0.2265\n",
      "\n",
      "======================================================================\n",
      "EPOCH 27/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 27: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.196]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.8814\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.18it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.14it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2628 | Val F1: 0.2438 | Gap: 0.0190 | EM: 0.1400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.2438\n",
      "\n",
      "======================================================================\n",
      "EPOCH 28/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 28: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.443]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.7260\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:21<00:00, 14.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2905 | Val F1: 0.2765 | Gap: 0.0140 | EM: 0.1433\n",
      "✓ SAVED! Best F1: 0.2765\n",
      "\n",
      "======================================================================\n",
      "EPOCH 29/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 29: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.343]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.5774\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 13.16it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.44it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2928 | Val F1: 0.2645 | Gap: 0.0283 | EM: 0.1433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 30/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 30: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.012]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.4328\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.84it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:21<00:00, 13.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3283 | Val F1: 0.2900 | Gap: 0.0383 | EM: 0.1467\n",
      "✓ SAVED! Best F1: 0.2900\n",
      "\n",
      "======================================================================\n",
      "EPOCH 31/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 31: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.308]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.3063\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 14.84it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3081 | Val F1: 0.2856 | Gap: 0.0225 | EM: 0.1500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 32/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 32: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.458]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.1774\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.25it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 15.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3497 | Val F1: 0.3432 | Gap: 0.0064 | EM: 0.2033\n",
      "✓ SAVED! Best F1: 0.3432\n",
      "\n",
      "======================================================================\n",
      "EPOCH 33/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 33: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.751]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.0658\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.01it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 16.46it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3599 | Val F1: 0.3255 | Gap: 0.0344 | EM: 0.1800\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 34/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 34: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.003]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.9645\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 18.16it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.09it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3720 | Val F1: 0.3386 | Gap: 0.0334 | EM: 0.1867\n",
      "\n",
      "======================================================================\n",
      "EPOCH 35/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 35: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.798]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.8688\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 14.04it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3389 | Val F1: 0.3387 | Gap: 0.0001 | EM: 0.1833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gig atons\n",
      "  F1: 0.400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 36/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 36: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.891]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.7850\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.46it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4000 | Val F1: 0.3998 | Gap: 0.0002 | EM: 0.2300\n",
      "✓ SAVED! Best F1: 0.3998\n",
      "\n",
      "======================================================================\n",
      "EPOCH 37/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 37: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.982]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.7086\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.48it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3769 | Val F1: 0.3442 | Gap: 0.0326 | EM: 0.2000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 38/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 38: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=3.256]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.6325\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.64it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.57it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4452 | Val F1: 0.3933 | Gap: 0.0519 | EM: 0.2300\n",
      "\n",
      "======================================================================\n",
      "EPOCH 39/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 39: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.416]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5718\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.26it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.96it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4422 | Val F1: 0.4075 | Gap: 0.0347 | EM: 0.2333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4075\n",
      "\n",
      "======================================================================\n",
      "EPOCH 40/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 40: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.380]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5090\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.07it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4369 | Val F1: 0.3918 | Gap: 0.0451 | EM: 0.2267\n",
      "\n",
      "======================================================================\n",
      "EPOCH 41/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 41: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.808]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4574\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.49it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4697 | Val F1: 0.4167 | Gap: 0.0531 | EM: 0.2500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4167\n",
      "\n",
      "======================================================================\n",
      "EPOCH 42/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 42: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.633]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4026\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4660 | Val F1: 0.4123 | Gap: 0.0537 | EM: 0.2500\n",
      "\n",
      "======================================================================\n",
      "EPOCH 43/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 43: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.406]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3520\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.01it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 16.75it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4978 | Val F1: 0.4407 | Gap: 0.0570 | EM: 0.2400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4407\n",
      "\n",
      "======================================================================\n",
      "EPOCH 44/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 44: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.048]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3070\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.98it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5142 | Val F1: 0.4190 | Gap: 0.0952 | EM: 0.2400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 45/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 45: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.270]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2588\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.65it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5012 | Val F1: 0.4263 | Gap: 0.0748 | EM: 0.2400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 46/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 46: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=3.103]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2184\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.37it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5336 | Val F1: 0.4550 | Gap: 0.0786 | EM: 0.2667\n",
      "✓ SAVED! Best F1: 0.4550\n",
      "\n",
      "======================================================================\n",
      "EPOCH 47/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 47: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.119]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1843\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.16it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5199 | Val F1: 0.4513 | Gap: 0.0686 | EM: 0.2600\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 48/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 48: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.105]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1503\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 22.21it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5281 | Val F1: 0.4589 | Gap: 0.0692 | EM: 0.2633\n",
      "✓ SAVED! Best F1: 0.4589\n",
      "\n",
      "======================================================================\n",
      "EPOCH 49/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 49: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.960]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1162\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.24it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.51it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5315 | Val F1: 0.4396 | Gap: 0.0918 | EM: 0.2500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 50/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 50: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.853]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0856\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.32it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.15it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5360 | Val F1: 0.4831 | Gap: 0.0529 | EM: 0.2900\n",
      "✓ SAVED! Best F1: 0.4831\n",
      "\n",
      "======================================================================\n",
      "EPOCH 51/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 51: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.838]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.26it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 25.00it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5190 | Val F1: 0.4568 | Gap: 0.0622 | EM: 0.2700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 52/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 52: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.898]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0291\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.17it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.14it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5235 | Val F1: 0.4495 | Gap: 0.0740 | EM: 0.2667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 53/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 53: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.092]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0088\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.94it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.48it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5747 | Val F1: 0.4788 | Gap: 0.0959 | EM: 0.3067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 54/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 54: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.773]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9838\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.71it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5845 | Val F1: 0.4760 | Gap: 0.1085 | EM: 0.2767\n",
      "\n",
      "======================================================================\n",
      "EPOCH 55/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 55: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=3.200]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9609\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5378 | Val F1: 0.4423 | Gap: 0.0955 | EM: 0.2567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 56/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 56: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.103]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9360\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.06it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.97it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5823 | Val F1: 0.4998 | Gap: 0.0826 | EM: 0.3100\n",
      "✓ SAVED! Best F1: 0.4998\n",
      "\n",
      "======================================================================\n",
      "EPOCH 57/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 57: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.897]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9139\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 23.03it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5929 | Val F1: 0.4643 | Gap: 0.1286 | EM: 0.2733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 58/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 58: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=3.067]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8921\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.11it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5814 | Val F1: 0.4751 | Gap: 0.1063 | EM: 0.2767\n",
      "\n",
      "======================================================================\n",
      "EPOCH 59/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 59: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.679]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8734\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 22.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 18.84it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5855 | Val F1: 0.4746 | Gap: 0.1109 | EM: 0.2800\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 60/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 60: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.316]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8566\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.25it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.55it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6146 | Val F1: 0.5074 | Gap: 0.1072 | EM: 0.3133\n",
      "✓ SAVED! Best F1: 0.5074\n",
      "\n",
      "======================================================================\n",
      "EPOCH 61/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 61: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.816]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8393\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.75it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6033 | Val F1: 0.4830 | Gap: 0.1203 | EM: 0.2867\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 62/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 62: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.918]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8171\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.10it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.51it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6047 | Val F1: 0.4937 | Gap: 0.1110 | EM: 0.2933\n",
      "\n",
      "======================================================================\n",
      "EPOCH 63/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 63: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.909]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7996\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6009 | Val F1: 0.4967 | Gap: 0.1042 | EM: 0.2967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 64/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 64: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.582]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7834\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.71it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.53it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6385 | Val F1: 0.5207 | Gap: 0.1178 | EM: 0.3033\n",
      "✓ SAVED! Best F1: 0.5207\n",
      "\n",
      "======================================================================\n",
      "EPOCH 65/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 65: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.837]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7659\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6368 | Val F1: 0.4943 | Gap: 0.1425 | EM: 0.2867\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 66/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 66: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.556]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7510\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 23.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5995 | Val F1: 0.4917 | Gap: 0.1078 | EM: 0.2967\n",
      "\n",
      "======================================================================\n",
      "EPOCH 67/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 67: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.745]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7336\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.25it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6372 | Val F1: 0.5134 | Gap: 0.1238 | EM: 0.3167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 68/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 68: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.684]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7206\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.32it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6505 | Val F1: 0.4969 | Gap: 0.1537 | EM: 0.2933\n",
      "\n",
      "======================================================================\n",
      "EPOCH 69/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 69: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.611]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7078\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.14it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6713 | Val F1: 0.5094 | Gap: 0.1619 | EM: 0.3133\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 70/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 70: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.607]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6909\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.73it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.78it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6452 | Val F1: 0.4999 | Gap: 0.1453 | EM: 0.3000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 71/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 71: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.616]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6777\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6435 | Val F1: 0.5098 | Gap: 0.1337 | EM: 0.3000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 72/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 72: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.628]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6656\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.29it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.09it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6497 | Val F1: 0.5228 | Gap: 0.1269 | EM: 0.3233\n",
      "✓ SAVED! Best F1: 0.5228\n",
      "\n",
      "======================================================================\n",
      "EPOCH 73/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 73: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.745]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6523\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.61it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.31it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6596 | Val F1: 0.5165 | Gap: 0.1431 | EM: 0.3167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 74/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 74: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.592]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6393\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.31it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6851 | Val F1: 0.5272 | Gap: 0.1579 | EM: 0.3200\n",
      "✓ SAVED! Best F1: 0.5272\n",
      "\n",
      "======================================================================\n",
      "EPOCH 75/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 75: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.748]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6254\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.95it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.57it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6733 | Val F1: 0.5089 | Gap: 0.1644 | EM: 0.3133\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 76/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 76: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.461]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6143\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.84it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6837 | Val F1: 0.5243 | Gap: 0.1594 | EM: 0.3167\n",
      "\n",
      "======================================================================\n",
      "EPOCH 77/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 77: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.559]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6034\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.39it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.17it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6802 | Val F1: 0.5455 | Gap: 0.1346 | EM: 0.3533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5455\n",
      "\n",
      "======================================================================\n",
      "EPOCH 78/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 78: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.17it/s, loss=2.573]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5887\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.16it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6316 | Val F1: 0.5293 | Gap: 0.1023 | EM: 0.3333\n",
      "\n",
      "======================================================================\n",
      "EPOCH 79/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 79: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.537]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5824\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.20it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7328 | Val F1: 0.5471 | Gap: 0.1857 | EM: 0.3467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5471\n",
      "\n",
      "======================================================================\n",
      "EPOCH 80/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 80: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.342]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5697\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.39it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6908 | Val F1: 0.5490 | Gap: 0.1417 | EM: 0.3500\n",
      "✓ SAVED! Best F1: 0.5490\n",
      "\n",
      "======================================================================\n",
      "EPOCH 81/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 81: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.561]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5586\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.76it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7028 | Val F1: 0.5453 | Gap: 0.1575 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gigatons\n",
      "  F1: 1.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 82/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 82: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.549]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5518\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.58it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7061 | Val F1: 0.5392 | Gap: 0.1669 | EM: 0.3400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 83/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 83: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.512]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5389\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.83it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.50it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6940 | Val F1: 0.5398 | Gap: 0.1542 | EM: 0.3400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 84/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 84: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.512]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5292\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.39it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.76it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7182 | Val F1: 0.5377 | Gap: 0.1805 | EM: 0.3400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 85/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 85: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.373]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5184\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7084 | Val F1: 0.5460 | Gap: 0.1623 | EM: 0.3400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gigatons\n",
      "  F1: 1.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 86/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 86: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.021]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5105\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.30it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.07it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7397 | Val F1: 0.5529 | Gap: 0.1868 | EM: 0.3567\n",
      "✓ SAVED! Best F1: 0.5529\n",
      "\n",
      "======================================================================\n",
      "EPOCH 87/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 87: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.298]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4998\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7049 | Val F1: 0.5415 | Gap: 0.1634 | EM: 0.3333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 88/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 88: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.638]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4928\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7594 | Val F1: 0.5824 | Gap: 0.1771 | EM: 0.3633\n",
      "✓ SAVED! Best F1: 0.5824\n",
      "\n",
      "======================================================================\n",
      "EPOCH 89/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 89: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.456]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4825\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.17it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.22it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7287 | Val F1: 0.5290 | Gap: 0.1997 | EM: 0.3267\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 90/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 90: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.407]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4726\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.43it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7421 | Val F1: 0.5614 | Gap: 0.1807 | EM: 0.3667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 91/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 91: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.254]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4639\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.63it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.25it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7372 | Val F1: 0.5646 | Gap: 0.1726 | EM: 0.3567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 92/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 92: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.632]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4561\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.80it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7381 | Val F1: 0.5505 | Gap: 0.1876 | EM: 0.3467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 93/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 93: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.598]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4484\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.95it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7370 | Val F1: 0.5821 | Gap: 0.1549 | EM: 0.3667\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 94/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 94: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.714]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4403\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.84it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7226 | Val F1: 0.5387 | Gap: 0.1839 | EM: 0.3333\n",
      "\n",
      "======================================================================\n",
      "EPOCH 95/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 95: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.507]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4321\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.64it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.24it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7852 | Val F1: 0.5541 | Gap: 0.2312 | EM: 0.3433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 96/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 96: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.577]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4254\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.26it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 23.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7929 | Val F1: 0.5713 | Gap: 0.2216 | EM: 0.3600\n",
      "\n",
      "======================================================================\n",
      "EPOCH 97/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 97: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.447]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4202\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.31it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7628 | Val F1: 0.5940 | Gap: 0.1687 | EM: 0.3700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5940\n",
      "\n",
      "======================================================================\n",
      "EPOCH 98/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 98: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.499]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4082\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7502 | Val F1: 0.5760 | Gap: 0.1743 | EM: 0.3633\n",
      "\n",
      "======================================================================\n",
      "EPOCH 99/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 99: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.300]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4031\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7822 | Val F1: 0.5864 | Gap: 0.1958 | EM: 0.3833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 100/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 100: 100%|████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.402]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3912\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.78it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.83it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7707 | Val F1: 0.5611 | Gap: 0.2096 | EM: 0.3667\n",
      "\n",
      "======================================================================\n",
      "FINAL RESULTS\n",
      "======================================================================\n",
      "Best Val F1: 59.4%\n",
      "Final Val F1: 56.1%\n",
      "Final EM: 36.7%\n",
      "Train-Val Gap: 0.2096\n",
      "Training for seed 1236\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Total parameters: 21.7M\n",
      "Trainable parameters: 21.7M\n",
      "\n",
      "======================================================================\n",
      "TESTING Q/K HYPOTHESIS - Q/K LR = 20x\n",
      "======================================================================\n",
      "\n",
      "Q/K params: 1.1M\n",
      "Other params: 5.5M\n",
      "\n",
      "\n",
      "======================================================================\n",
      "EPOCH 1/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=8.551]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 11.6894\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:40<00:00,  4.89it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [01:01<00:00,  4.91it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0000 | Val F1: 0.0000 | Gap: 0.0000 | EM: 0.0000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 6\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 2/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=7.365]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.5698\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.00it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.55it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0097 | Val F1: 0.0115 | Gap: -0.0017 | EM: 0.0033\n",
      "✓ SAVED! Best F1: 0.0115\n",
      "\n",
      "======================================================================\n",
      "EPOCH 3/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 3: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=7.049]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.1028\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.36it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0301 | Val F1: 0.0129 | Gap: 0.0173 | EM: 0.0000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0129\n",
      "\n",
      "======================================================================\n",
      "EPOCH 4/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 4: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.469]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.8477\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:22<00:00,  8.75it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:31<00:00,  9.46it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0515 | Val F1: 0.0362 | Gap: 0.0152 | EM: 0.0033\n",
      "✓ SAVED! Best F1: 0.0362\n",
      "\n",
      "======================================================================\n",
      "EPOCH 5/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 5: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.925]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.6661\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.23it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0586 | Val F1: 0.0416 | Gap: 0.0170 | EM: 0.0067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0416\n",
      "\n",
      "======================================================================\n",
      "EPOCH 6/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 6: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.263]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.5200\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0850 | Val F1: 0.0607 | Gap: 0.0243 | EM: 0.0100\n",
      "✓ SAVED! Best F1: 0.0607\n",
      "\n",
      "======================================================================\n",
      "EPOCH 7/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 7: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.863]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.3941\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0770 | Val F1: 0.0899 | Gap: -0.0129 | EM: 0.0333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0899\n",
      "\n",
      "======================================================================\n",
      "EPOCH 8/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=5.704]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.2863\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.93it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0974 | Val F1: 0.0833 | Gap: 0.0141 | EM: 0.0233\n",
      "\n",
      "======================================================================\n",
      "EPOCH 9/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.076]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.1926\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.42it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0875 | Val F1: 0.1042 | Gap: -0.0167 | EM: 0.0467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1042\n",
      "\n",
      "======================================================================\n",
      "EPOCH 10/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.868]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.1033\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0949 | Val F1: 0.1275 | Gap: -0.0326 | EM: 0.0567\n",
      "✓ SAVED! Best F1: 0.1275\n",
      "\n",
      "======================================================================\n",
      "EPOCH 11/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 11: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.054]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.0263\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 14.42it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:22<00:00, 13.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1130 | Val F1: 0.1254 | Gap: -0.0124 | EM: 0.0567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 12/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 12: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.559]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9486\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.83it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1267 | Val F1: 0.1266 | Gap: 0.0001 | EM: 0.0600\n",
      "\n",
      "======================================================================\n",
      "EPOCH 13/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 13: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.428]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.8772\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.53it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1588 | Val F1: 0.1273 | Gap: 0.0314 | EM: 0.0667\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 14/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 14: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.723]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.8062\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.65it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1759 | Val F1: 0.1152 | Gap: 0.0607 | EM: 0.0467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 15/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 15: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.912]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7370\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1552 | Val F1: 0.1411 | Gap: 0.0140 | EM: 0.0733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1411\n",
      "\n",
      "======================================================================\n",
      "EPOCH 16/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 16: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.903]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6704\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.54it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 10.81it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1799 | Val F1: 0.1630 | Gap: 0.0168 | EM: 0.0867\n",
      "✓ SAVED! Best F1: 0.1630\n",
      "\n",
      "======================================================================\n",
      "EPOCH 17/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 17: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.618]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5975\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.98it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1688 | Val F1: 0.1554 | Gap: 0.0133 | EM: 0.0767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 18/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 18: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.951]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5207\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.73it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1727 | Val F1: 0.1748 | Gap: -0.0022 | EM: 0.0933\n",
      "✓ SAVED! Best F1: 0.1748\n",
      "\n",
      "======================================================================\n",
      "EPOCH 19/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 19: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.586]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.4228\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.42it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.93it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1828 | Val F1: 0.1659 | Gap: 0.0170 | EM: 0.0967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 20/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 20: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.220]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.2974\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.17it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.51it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2370 | Val F1: 0.1906 | Gap: 0.0464 | EM: 0.1033\n",
      "✓ SAVED! Best F1: 0.1906\n",
      "\n",
      "======================================================================\n",
      "EPOCH 21/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 21: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.162]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.1521\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.56it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2217 | Val F1: 0.1956 | Gap: 0.0261 | EM: 0.1067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1956\n",
      "\n",
      "======================================================================\n",
      "EPOCH 22/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 22: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.748]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.9901\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.68it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2468 | Val F1: 0.2177 | Gap: 0.0291 | EM: 0.1233\n",
      "✓ SAVED! Best F1: 0.2177\n",
      "\n",
      "======================================================================\n",
      "EPOCH 23/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 23: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.820]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.8205\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.65it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 15.83it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2856 | Val F1: 0.2579 | Gap: 0.0277 | EM: 0.1367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.2579\n",
      "\n",
      "======================================================================\n",
      "EPOCH 24/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 24: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.780]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.6572\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 14.95it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2978 | Val F1: 0.2635 | Gap: 0.0343 | EM: 0.1367\n",
      "✓ SAVED! Best F1: 0.2635\n",
      "\n",
      "======================================================================\n",
      "EPOCH 25/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 25: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.668]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.5079\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 13.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2974 | Val F1: 0.2621 | Gap: 0.0354 | EM: 0.1467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 26/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 26: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.404]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.3609\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2574 | Val F1: 0.2708 | Gap: -0.0134 | EM: 0.1533\n",
      "✓ SAVED! Best F1: 0.2708\n",
      "\n",
      "======================================================================\n",
      "EPOCH 27/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 27: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.869]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.2341\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.54it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.70it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3341 | Val F1: 0.2938 | Gap: 0.0402 | EM: 0.1700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.2938\n",
      "\n",
      "======================================================================\n",
      "EPOCH 28/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 28: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.689]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.1126\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 15.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 15.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3282 | Val F1: 0.3230 | Gap: 0.0052 | EM: 0.1867\n",
      "✓ SAVED! Best F1: 0.3230\n",
      "\n",
      "======================================================================\n",
      "EPOCH 29/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 29: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.391]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.0041\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.34it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.50it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3816 | Val F1: 0.3156 | Gap: 0.0660 | EM: 0.1700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 30/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 30: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.458]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.9087\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 14.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:21<00:00, 13.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3461 | Val F1: 0.3422 | Gap: 0.0039 | EM: 0.1967\n",
      "✓ SAVED! Best F1: 0.3422\n",
      "\n",
      "======================================================================\n",
      "EPOCH 31/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 31: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.104]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.8187\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 14.06it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 16.52it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3492 | Val F1: 0.3280 | Gap: 0.0212 | EM: 0.1833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 32/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 32: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.589]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.7409\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.11it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.76it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3808 | Val F1: 0.3475 | Gap: 0.0333 | EM: 0.2000\n",
      "✓ SAVED! Best F1: 0.3475\n",
      "\n",
      "======================================================================\n",
      "EPOCH 33/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 33: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.135]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.6629\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.40it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.29it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3791 | Val F1: 0.3683 | Gap: 0.0109 | EM: 0.2033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5 gig atons\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.3683\n",
      "\n",
      "======================================================================\n",
      "EPOCH 34/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 34: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.284]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5924\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.65it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4040 | Val F1: 0.3560 | Gap: 0.0480 | EM: 0.2200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 35/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 35: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.485]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5322\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.36it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4448 | Val F1: 0.3967 | Gap: 0.0481 | EM: 0.2467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.3967\n",
      "\n",
      "======================================================================\n",
      "EPOCH 36/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 36: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.296]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4664\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.95it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.14it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4262 | Val F1: 0.3774 | Gap: 0.0488 | EM: 0.2233\n",
      "\n",
      "======================================================================\n",
      "EPOCH 37/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 37: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.233]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4134\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.37it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4701 | Val F1: 0.4074 | Gap: 0.0627 | EM: 0.2300\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4074\n",
      "\n",
      "======================================================================\n",
      "EPOCH 38/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 38: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.484]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3615\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.26it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4326 | Val F1: 0.3685 | Gap: 0.0641 | EM: 0.2233\n",
      "\n",
      "======================================================================\n",
      "EPOCH 39/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 39: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.935]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3158\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.26it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.30it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4769 | Val F1: 0.4150 | Gap: 0.0619 | EM: 0.2467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4150\n",
      "\n",
      "======================================================================\n",
      "EPOCH 40/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 40: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.044]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2735\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.56it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 18.91it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4705 | Val F1: 0.4329 | Gap: 0.0376 | EM: 0.2633\n",
      "✓ SAVED! Best F1: 0.4329\n",
      "\n",
      "======================================================================\n",
      "EPOCH 41/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 41: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.374]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2354\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.60it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.66it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4806 | Val F1: 0.4248 | Gap: 0.0558 | EM: 0.2533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5 gig atons\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 42/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 42: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.271]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1971\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.94it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 18.97it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5109 | Val F1: 0.4284 | Gap: 0.0826 | EM: 0.2467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 43/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 43: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.877]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1619\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.25it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.41it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4911 | Val F1: 0.4388 | Gap: 0.0523 | EM: 0.2533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gig atons\n",
      "  F1: 0.400\n",
      "✓ SAVED! Best F1: 0.4388\n",
      "\n",
      "======================================================================\n",
      "EPOCH 44/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 44: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.198]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1311\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.24it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4679 | Val F1: 0.4037 | Gap: 0.0642 | EM: 0.2400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 45/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 45: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.964]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1014\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5158 | Val F1: 0.4672 | Gap: 0.0486 | EM: 0.2800\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gig atons\n",
      "  F1: 0.400\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4672\n",
      "\n",
      "======================================================================\n",
      "EPOCH 46/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 46: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.321]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0695\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.42it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.41it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5652 | Val F1: 0.4810 | Gap: 0.0842 | EM: 0.3033\n",
      "✓ SAVED! Best F1: 0.4810\n",
      "\n",
      "======================================================================\n",
      "EPOCH 47/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 47: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.970]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0451\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.60it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5280 | Val F1: 0.4772 | Gap: 0.0508 | EM: 0.3033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 48/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 48: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.015]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0226\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.51it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.58it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5734 | Val F1: 0.4812 | Gap: 0.0923 | EM: 0.3233\n",
      "✓ SAVED! Best F1: 0.4812\n",
      "\n",
      "======================================================================\n",
      "EPOCH 49/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 49: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.262]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9997\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5621 | Val F1: 0.4693 | Gap: 0.0928 | EM: 0.3000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 50/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 50: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.891]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9753\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.96it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5734 | Val F1: 0.4900 | Gap: 0.0833 | EM: 0.3200\n",
      "✓ SAVED! Best F1: 0.4900\n",
      "\n",
      "======================================================================\n",
      "EPOCH 51/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 51: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.034]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9511\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.77it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5890 | Val F1: 0.4894 | Gap: 0.0996 | EM: 0.3067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 52/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 52: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.212]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9311\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5898 | Val F1: 0.4748 | Gap: 0.1150 | EM: 0.2800\n",
      "\n",
      "======================================================================\n",
      "EPOCH 53/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 53: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.875]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9060\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.44it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6050 | Val F1: 0.5132 | Gap: 0.0918 | EM: 0.3233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5132\n",
      "\n",
      "======================================================================\n",
      "EPOCH 54/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 54: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.827]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8908\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5941 | Val F1: 0.5025 | Gap: 0.0916 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 55/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 55: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.806]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8710\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.55it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5889 | Val F1: 0.4693 | Gap: 0.1196 | EM: 0.2733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 56/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 56: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.893]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8531\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6234 | Val F1: 0.5036 | Gap: 0.1198 | EM: 0.3067\n",
      "\n",
      "======================================================================\n",
      "EPOCH 57/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 57: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.549]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8368\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6112 | Val F1: 0.4839 | Gap: 0.1274 | EM: 0.2767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 58/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 58: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.805]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8189\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.78it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6054 | Val F1: 0.4915 | Gap: 0.1139 | EM: 0.2967\n",
      "\n",
      "======================================================================\n",
      "EPOCH 59/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 59: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.797]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7996\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.11it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6650 | Val F1: 0.5248 | Gap: 0.1402 | EM: 0.3400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5248\n",
      "\n",
      "======================================================================\n",
      "EPOCH 60/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 60: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.023]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7835\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.05it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.18it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6253 | Val F1: 0.5107 | Gap: 0.1146 | EM: 0.2967\n",
      "\n",
      "======================================================================\n",
      "EPOCH 61/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 61: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.771]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7674\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.40it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.29it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6656 | Val F1: 0.5028 | Gap: 0.1628 | EM: 0.3200\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gigatons\n",
      "  F1: 1.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 62/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 62: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.804]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7541\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.86it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6303 | Val F1: 0.5089 | Gap: 0.1214 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 63/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 63: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.930]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7368\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.38it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.31it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6450 | Val F1: 0.5147 | Gap: 0.1303 | EM: 0.3133\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 64/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 64: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.765]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7258\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.32it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6589 | Val F1: 0.5043 | Gap: 0.1546 | EM: 0.3167\n",
      "\n",
      "======================================================================\n",
      "EPOCH 65/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 65: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.892]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7145\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.40it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.64it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6392 | Val F1: 0.5436 | Gap: 0.0955 | EM: 0.3367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5436\n",
      "\n",
      "======================================================================\n",
      "EPOCH 66/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 66: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.940]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7009\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6410 | Val F1: 0.5220 | Gap: 0.1189 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 67/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 67: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.855]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6832\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.59it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6751 | Val F1: 0.5365 | Gap: 0.1385 | EM: 0.3167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 68/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 68: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.938]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6727\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6592 | Val F1: 0.5637 | Gap: 0.0955 | EM: 0.3567\n",
      "✓ SAVED! Best F1: 0.5637\n",
      "\n",
      "======================================================================\n",
      "EPOCH 69/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 69: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.450]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6605\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.17it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6835 | Val F1: 0.5696 | Gap: 0.1139 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5696\n",
      "\n",
      "======================================================================\n",
      "EPOCH 70/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 70: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.688]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6439\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.59it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6747 | Val F1: 0.5478 | Gap: 0.1269 | EM: 0.3400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 71/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 71: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.802]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6351\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.44it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6819 | Val F1: 0.5101 | Gap: 0.1718 | EM: 0.3033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 72/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 72: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.795]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6217\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.01it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.46it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6925 | Val F1: 0.5478 | Gap: 0.1447 | EM: 0.3433\n",
      "\n",
      "======================================================================\n",
      "EPOCH 73/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 73: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.480]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6091\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.62it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7139 | Val F1: 0.5476 | Gap: 0.1663 | EM: 0.3400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 74/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 74: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.083]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6001\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6695 | Val F1: 0.5317 | Gap: 0.1377 | EM: 0.3367\n",
      "\n",
      "======================================================================\n",
      "EPOCH 75/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 75: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.631]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5889\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.80it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.24it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6935 | Val F1: 0.5310 | Gap: 0.1625 | EM: 0.3200\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 76/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 76: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.484]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5800\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.11it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.09it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7336 | Val F1: 0.5907 | Gap: 0.1429 | EM: 0.3800\n",
      "✓ SAVED! Best F1: 0.5907\n",
      "\n",
      "======================================================================\n",
      "EPOCH 77/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 77: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.415]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5668\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.45it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.67it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6943 | Val F1: 0.5526 | Gap: 0.1416 | EM: 0.3467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 78/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 78: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.474]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5558\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.35it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.01it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7145 | Val F1: 0.5634 | Gap: 0.1511 | EM: 0.3533\n",
      "\n",
      "======================================================================\n",
      "EPOCH 79/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 79: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.518]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5447\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.76it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7206 | Val F1: 0.5724 | Gap: 0.1482 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 80/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 80: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.804]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5396\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.14it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.95it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6960 | Val F1: 0.5567 | Gap: 0.1393 | EM: 0.3533\n",
      "\n",
      "======================================================================\n",
      "EPOCH 81/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 81: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.514]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5267\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.55it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.03it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7391 | Val F1: 0.5952 | Gap: 0.1439 | EM: 0.3700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5952\n",
      "\n",
      "======================================================================\n",
      "EPOCH 82/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 82: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.320]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5165\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.49it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7171 | Val F1: 0.5473 | Gap: 0.1698 | EM: 0.3433\n",
      "\n",
      "======================================================================\n",
      "EPOCH 83/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 83: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.412]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5064\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.24it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7180 | Val F1: 0.5644 | Gap: 0.1536 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 84/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 84: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.463]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4972\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.93it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7302 | Val F1: 0.5862 | Gap: 0.1439 | EM: 0.3667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 85/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 85: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.263]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4899\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.67it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7567 | Val F1: 0.5954 | Gap: 0.1614 | EM: 0.3933\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5954\n",
      "\n",
      "======================================================================\n",
      "EPOCH 86/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 86: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.401]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4787\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.30it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.03it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6922 | Val F1: 0.5588 | Gap: 0.1334 | EM: 0.3467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 87/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 87: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.315]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4704\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.49it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.20it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7333 | Val F1: 0.5829 | Gap: 0.1504 | EM: 0.3533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 88/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 88: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.476]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4640\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.34it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7059 | Val F1: 0.5627 | Gap: 0.1433 | EM: 0.3533\n",
      "\n",
      "======================================================================\n",
      "EPOCH 89/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 89: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.510]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4527\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7681 | Val F1: 0.6052 | Gap: 0.1628 | EM: 0.3900\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.6052\n",
      "\n",
      "======================================================================\n",
      "EPOCH 90/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 90: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.337]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4444\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.97it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7489 | Val F1: 0.5812 | Gap: 0.1677 | EM: 0.3667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 91/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 91: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.555]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4389\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.01it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.41it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7382 | Val F1: 0.5796 | Gap: 0.1587 | EM: 0.3733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 92/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 92: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.440]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4286\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.53it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7476 | Val F1: 0.6042 | Gap: 0.1434 | EM: 0.3667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 93/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 93: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.781]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4227\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.38it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7062 | Val F1: 0.5874 | Gap: 0.1188 | EM: 0.3733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 94/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 94: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.521]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4149\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.79it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.98it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7583 | Val F1: 0.5890 | Gap: 0.1692 | EM: 0.3733\n",
      "\n",
      "======================================================================\n",
      "EPOCH 95/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 95: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.530]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4060\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.52it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7988 | Val F1: 0.6037 | Gap: 0.1951 | EM: 0.3800\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 96/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 96: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.556]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3972\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7506 | Val F1: 0.5948 | Gap: 0.1558 | EM: 0.3767\n",
      "\n",
      "======================================================================\n",
      "EPOCH 97/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 97: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.273]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3879\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.80it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7712 | Val F1: 0.5859 | Gap: 0.1853 | EM: 0.3733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 98/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 98: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.254]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3854\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.75it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.50it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7631 | Val F1: 0.6001 | Gap: 0.1630 | EM: 0.3900\n",
      "\n",
      "======================================================================\n",
      "EPOCH 99/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 99: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.314]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3765\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.16it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7755 | Val F1: 0.5826 | Gap: 0.1929 | EM: 0.3633\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 100/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 100: 100%|████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.381]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3693\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.54it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.89it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7790 | Val F1: 0.5898 | Gap: 0.1891 | EM: 0.3700\n",
      "\n",
      "======================================================================\n",
      "FINAL RESULTS\n",
      "======================================================================\n",
      "Best Val F1: 60.5%\n",
      "Final Val F1: 59.0%\n",
      "Final EM: 37.0%\n",
      "Train-Val Gap: 0.1891\n",
      "Training for seed 1237\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Total parameters: 21.7M\n",
      "Trainable parameters: 21.7M\n",
      "\n",
      "======================================================================\n",
      "TESTING Q/K HYPOTHESIS - Q/K LR = 20x\n",
      "======================================================================\n",
      "\n",
      "Q/K params: 1.1M\n",
      "Other params: 5.5M\n",
      "\n",
      "\n",
      "======================================================================\n",
      "EPOCH 1/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=8.160]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 11.9676\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.96it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:09<00:00, 31.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0020 | Val F1: 0.0039 | Gap: -0.0019 | EM: 0.0000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: \n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0039\n",
      "\n",
      "======================================================================\n",
      "EPOCH 2/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=7.658]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.5474\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:35<00:00,  5.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:57<00:00,  5.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0167 | Val F1: 0.0100 | Gap: 0.0067 | EM: 0.0033\n",
      "✓ SAVED! Best F1: 0.0100\n",
      "\n",
      "======================================================================\n",
      "EPOCH 3/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 3: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=7.089]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.0832\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:23<00:00,  8.39it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:40<00:00,  7.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0309 | Val F1: 0.0269 | Gap: 0.0041 | EM: 0.0067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2, 2015\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0269\n",
      "\n",
      "======================================================================\n",
      "EPOCH 4/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 4: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.698]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.8225\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:34<00:00,  8.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0629 | Val F1: 0.0614 | Gap: 0.0016 | EM: 0.0200\n",
      "✓ SAVED! Best F1: 0.0614\n",
      "\n",
      "======================================================================\n",
      "EPOCH 5/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 5: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.614]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.6348\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.21it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:32<00:00,  9.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0750 | Val F1: 0.0625 | Gap: 0.0125 | EM: 0.0167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0625\n",
      "\n",
      "======================================================================\n",
      "EPOCH 6/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 6: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=6.540]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.4873\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.58it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:36<00:00,  8.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0778 | Val F1: 0.0763 | Gap: 0.0015 | EM: 0.0300\n",
      "✓ SAVED! Best F1: 0.0763\n",
      "\n",
      "======================================================================\n",
      "EPOCH 7/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 7: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.175]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.3595\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.72it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0818 | Val F1: 0.0834 | Gap: -0.0016 | EM: 0.0367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0834\n",
      "\n",
      "======================================================================\n",
      "EPOCH 8/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.492]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.2530\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:33<00:00,  9.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1045 | Val F1: 0.1013 | Gap: 0.0031 | EM: 0.0433\n",
      "✓ SAVED! Best F1: 0.1013\n",
      "\n",
      "======================================================================\n",
      "EPOCH 9/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9: 100%|██████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=6.544]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.1510\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.79it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.25it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1353 | Val F1: 0.1064 | Gap: 0.0289 | EM: 0.0500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2. 2. 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1064\n",
      "\n",
      "======================================================================\n",
      "EPOCH 10/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=6.369]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.0647\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.34it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.65it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1517 | Val F1: 0.1111 | Gap: 0.0407 | EM: 0.0467\n",
      "✓ SAVED! Best F1: 0.1111\n",
      "\n",
      "======================================================================\n",
      "EPOCH 11/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 11: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=5.706]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9858\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.23it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1682 | Val F1: 0.1283 | Gap: 0.0399 | EM: 0.0633\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1283\n",
      "\n",
      "======================================================================\n",
      "EPOCH 12/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 12: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.692]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9069\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.91it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:31<00:00,  9.66it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1484 | Val F1: 0.1184 | Gap: 0.0301 | EM: 0.0633\n",
      "\n",
      "======================================================================\n",
      "EPOCH 13/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 13: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=5.724]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.8355\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.88it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1507 | Val F1: 0.1257 | Gap: 0.0250 | EM: 0.0600\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 14/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 14: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.884]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7623\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1759 | Val F1: 0.1399 | Gap: 0.0360 | EM: 0.0633\n",
      "✓ SAVED! Best F1: 0.1399\n",
      "\n",
      "======================================================================\n",
      "EPOCH 15/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 15: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.532]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6981\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.90it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2096 | Val F1: 0.1434 | Gap: 0.0662 | EM: 0.0700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2.5.5.5. 5.5.5\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1434\n",
      "\n",
      "======================================================================\n",
      "EPOCH 16/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 16: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.638]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6372\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1826 | Val F1: 0.1541 | Gap: 0.0285 | EM: 0.0733\n",
      "✓ SAVED! Best F1: 0.1541\n",
      "\n",
      "======================================================================\n",
      "EPOCH 17/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 17: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.227]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5716\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 14.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2199 | Val F1: 0.1547 | Gap: 0.0652 | EM: 0.0867\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1547\n",
      "\n",
      "======================================================================\n",
      "EPOCH 18/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 18: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=6.039]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5018\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.82it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2081 | Val F1: 0.1653 | Gap: 0.0428 | EM: 0.0933\n",
      "✓ SAVED! Best F1: 0.1653\n",
      "\n",
      "======================================================================\n",
      "EPOCH 19/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 19: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.845]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.4335\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.24it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2056 | Val F1: 0.1904 | Gap: 0.0152 | EM: 0.0967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1904\n",
      "\n",
      "======================================================================\n",
      "EPOCH 20/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 20: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=5.477]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.3561\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 13.09it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.15it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2190 | Val F1: 0.1779 | Gap: 0.0410 | EM: 0.1000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 21/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 21: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=5.210]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.2619\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.79it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:34<00:00,  8.82it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2345 | Val F1: 0.1655 | Gap: 0.0690 | EM: 0.1000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.5.\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 22/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 22: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=5.373]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.1428\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.62it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 10.83it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2249 | Val F1: 0.1870 | Gap: 0.0379 | EM: 0.1067\n",
      "\n",
      "======================================================================\n",
      "EPOCH 23/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 23: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=5.353]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.9874\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.18it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2434 | Val F1: 0.2120 | Gap: 0.0313 | EM: 0.1233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.2120\n",
      "\n",
      "======================================================================\n",
      "EPOCH 24/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 24: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.059]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.8158\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.83it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:22<00:00, 13.52it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2511 | Val F1: 0.2414 | Gap: 0.0098 | EM: 0.1333\n",
      "✓ SAVED! Best F1: 0.2414\n",
      "\n",
      "======================================================================\n",
      "EPOCH 25/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 25: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.569]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.6526\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.44it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 13.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2647 | Val F1: 0.2499 | Gap: 0.0148 | EM: 0.1300\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.2499\n",
      "\n",
      "======================================================================\n",
      "EPOCH 26/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 26: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.458]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.5073\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.55it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:22<00:00, 13.41it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3208 | Val F1: 0.2515 | Gap: 0.0693 | EM: 0.1400\n",
      "✓ SAVED! Best F1: 0.2515\n",
      "\n",
      "======================================================================\n",
      "EPOCH 27/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 27: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.382]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.3629\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 13.10it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:21<00:00, 13.91it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3075 | Val F1: 0.2587 | Gap: 0.0488 | EM: 0.1367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.2587\n",
      "\n",
      "======================================================================\n",
      "EPOCH 28/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 28: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=3.857]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.2409\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.78it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3393 | Val F1: 0.2793 | Gap: 0.0599 | EM: 0.1600\n",
      "✓ SAVED! Best F1: 0.2793\n",
      "\n",
      "======================================================================\n",
      "EPOCH 29/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 29: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.021]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.1192\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 18.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 16.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3407 | Val F1: 0.2902 | Gap: 0.0506 | EM: 0.1767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.2902\n",
      "\n",
      "======================================================================\n",
      "EPOCH 30/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 30: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.656]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.0176\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 15.37it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 15.94it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3680 | Val F1: 0.3199 | Gap: 0.0482 | EM: 0.1767\n",
      "✓ SAVED! Best F1: 0.3199\n",
      "\n",
      "======================================================================\n",
      "EPOCH 31/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 31: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.036]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.9219\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 15.56it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3775 | Val F1: 0.3244 | Gap: 0.0530 | EM: 0.1867\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5 gig atons\n",
      "  F1: 0.400\n",
      "✓ SAVED! Best F1: 0.3244\n",
      "\n",
      "======================================================================\n",
      "EPOCH 32/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 32: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.786]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.8430\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 16.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3589 | Val F1: 0.3320 | Gap: 0.0269 | EM: 0.1867\n",
      "✓ SAVED! Best F1: 0.3320\n",
      "\n",
      "======================================================================\n",
      "EPOCH 33/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 33: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.649]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.7573\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 14.20it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.60it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3955 | Val F1: 0.3573 | Gap: 0.0382 | EM: 0.1967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.3573\n",
      "\n",
      "======================================================================\n",
      "EPOCH 34/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 34: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.178]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.6923\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.76it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.66it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4039 | Val F1: 0.3220 | Gap: 0.0819 | EM: 0.1767\n",
      "\n",
      "======================================================================\n",
      "EPOCH 35/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 35: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.829]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.6245\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 15.21it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3817 | Val F1: 0.3553 | Gap: 0.0264 | EM: 0.2067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 36/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 36: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=3.428]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5597\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 16.72it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 16.88it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4063 | Val F1: 0.3615 | Gap: 0.0448 | EM: 0.2167\n",
      "✓ SAVED! Best F1: 0.3615\n",
      "\n",
      "======================================================================\n",
      "EPOCH 37/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 37: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=4.310]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5022\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 18.52it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.18it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4551 | Val F1: 0.3795 | Gap: 0.0757 | EM: 0.2100\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.3795\n",
      "\n",
      "======================================================================\n",
      "EPOCH 38/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 38: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.414]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4439\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4211 | Val F1: 0.4234 | Gap: -0.0023 | EM: 0.2567\n",
      "✓ SAVED! Best F1: 0.4234\n",
      "\n",
      "======================================================================\n",
      "EPOCH 39/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 39: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.548]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3923\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.54it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.57it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5138 | Val F1: 0.4204 | Gap: 0.0934 | EM: 0.2500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 40/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 40: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.468]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3431\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.50it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4799 | Val F1: 0.4022 | Gap: 0.0777 | EM: 0.2433\n",
      "\n",
      "======================================================================\n",
      "EPOCH 41/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 41: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=3.176]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2968\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.32it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.26it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4986 | Val F1: 0.4045 | Gap: 0.0941 | EM: 0.2300\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 42/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 42: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.547]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2581\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.60it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 18.84it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4644 | Val F1: 0.4108 | Gap: 0.0536 | EM: 0.2467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 43/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 43: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.357]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2214\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.49it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5130 | Val F1: 0.4431 | Gap: 0.0699 | EM: 0.2633\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4431\n",
      "\n",
      "======================================================================\n",
      "EPOCH 44/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 44: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.871]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1802\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.80it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4778 | Val F1: 0.4344 | Gap: 0.0434 | EM: 0.2400\n",
      "\n",
      "======================================================================\n",
      "EPOCH 45/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 45: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.089]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1487\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.83it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5694 | Val F1: 0.4656 | Gap: 0.1038 | EM: 0.2800\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4656\n",
      "\n",
      "======================================================================\n",
      "EPOCH 46/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 46: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.900]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1154\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.68it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5326 | Val F1: 0.4570 | Gap: 0.0757 | EM: 0.2633\n",
      "\n",
      "======================================================================\n",
      "EPOCH 47/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 47: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.383]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0886\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.65it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5710 | Val F1: 0.4785 | Gap: 0.0925 | EM: 0.2767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4785\n",
      "\n",
      "======================================================================\n",
      "EPOCH 48/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 48: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.516]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0621\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.68it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.77it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5588 | Val F1: 0.4442 | Gap: 0.1146 | EM: 0.2600\n",
      "\n",
      "======================================================================\n",
      "EPOCH 49/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 49: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.627]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0308\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5400 | Val F1: 0.4618 | Gap: 0.0782 | EM: 0.2767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 50/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 50: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.990]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0100\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.06it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.76it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5612 | Val F1: 0.4264 | Gap: 0.1347 | EM: 0.2367\n",
      "\n",
      "======================================================================\n",
      "EPOCH 51/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 51: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=3.031]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9890\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.72it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.78it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5952 | Val F1: 0.4585 | Gap: 0.1367 | EM: 0.2833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 52/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 52: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.767]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9622\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 22.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.65it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5838 | Val F1: 0.4614 | Gap: 0.1224 | EM: 0.2733\n",
      "\n",
      "======================================================================\n",
      "EPOCH 53/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 53: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.779]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9383\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.26it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5905 | Val F1: 0.4837 | Gap: 0.1068 | EM: 0.2933\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4837\n",
      "\n",
      "======================================================================\n",
      "EPOCH 54/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 54: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.852]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9239\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.46it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5929 | Val F1: 0.4706 | Gap: 0.1223 | EM: 0.2867\n",
      "\n",
      "======================================================================\n",
      "EPOCH 55/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 55: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.731]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9016\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6024 | Val F1: 0.4523 | Gap: 0.1501 | EM: 0.2733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 56/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 56: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.902]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8788\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.15it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6283 | Val F1: 0.5092 | Gap: 0.1190 | EM: 0.3233\n",
      "✓ SAVED! Best F1: 0.5092\n",
      "\n",
      "======================================================================\n",
      "EPOCH 57/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 57: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.996]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8609\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.54it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 23.00it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6017 | Val F1: 0.4845 | Gap: 0.1172 | EM: 0.2833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 58/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 58: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.981]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8452\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.66it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.01it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6181 | Val F1: 0.4644 | Gap: 0.1537 | EM: 0.2767\n",
      "\n",
      "======================================================================\n",
      "EPOCH 59/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 59: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.830]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8236\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.78it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6019 | Val F1: 0.4847 | Gap: 0.1172 | EM: 0.2967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 60/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 60: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.761]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8098\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.49it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.61it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6652 | Val F1: 0.5365 | Gap: 0.1287 | EM: 0.3200\n",
      "✓ SAVED! Best F1: 0.5365\n",
      "\n",
      "======================================================================\n",
      "EPOCH 61/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 61: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.727]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7913\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.49it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6333 | Val F1: 0.5011 | Gap: 0.1322 | EM: 0.3000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 62/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 62: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.629]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7761\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.15it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.90it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6572 | Val F1: 0.5157 | Gap: 0.1415 | EM: 0.3100\n",
      "\n",
      "======================================================================\n",
      "EPOCH 63/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 63: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.856]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7631\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.53it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.64it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6228 | Val F1: 0.4934 | Gap: 0.1295 | EM: 0.3067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 64/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 64: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.733]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7456\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.39it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.81it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6529 | Val F1: 0.4995 | Gap: 0.1534 | EM: 0.3100\n",
      "\n",
      "======================================================================\n",
      "EPOCH 65/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 65: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.513]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7284\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.40it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.45it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6499 | Val F1: 0.4931 | Gap: 0.1568 | EM: 0.2800\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 66/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 66: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.854]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7171\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.31it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6734 | Val F1: 0.5193 | Gap: 0.1541 | EM: 0.3133\n",
      "\n",
      "======================================================================\n",
      "EPOCH 67/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 67: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.711]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7021\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.99it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 27.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6496 | Val F1: 0.5054 | Gap: 0.1441 | EM: 0.3033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 68/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 68: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.604]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6906\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.11it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.50it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6859 | Val F1: 0.5053 | Gap: 0.1806 | EM: 0.2933\n",
      "\n",
      "======================================================================\n",
      "EPOCH 69/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 69: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.693]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6780\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.61it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6961 | Val F1: 0.5035 | Gap: 0.1926 | EM: 0.2967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 70/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 70: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.795]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6648\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.82it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.22it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6495 | Val F1: 0.5149 | Gap: 0.1346 | EM: 0.2867\n",
      "\n",
      "======================================================================\n",
      "EPOCH 71/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 71: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.589]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6502\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6731 | Val F1: 0.5168 | Gap: 0.1563 | EM: 0.2967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 72/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 72: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.619]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6345\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.24it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6628 | Val F1: 0.5097 | Gap: 0.1530 | EM: 0.3100\n",
      "\n",
      "======================================================================\n",
      "EPOCH 73/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 73: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.646]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6256\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.97it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6709 | Val F1: 0.5052 | Gap: 0.1657 | EM: 0.3133\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 74/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 74: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.609]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6143\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.44it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6555 | Val F1: 0.5094 | Gap: 0.1460 | EM: 0.3033\n",
      "\n",
      "======================================================================\n",
      "EPOCH 75/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 75: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.643]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6019\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:05<00:00, 35.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:09<00:00, 30.06it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7184 | Val F1: 0.5408 | Gap: 0.1776 | EM: 0.3333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5408\n",
      "\n",
      "======================================================================\n",
      "EPOCH 76/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 76: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.542]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5901\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.61it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.20it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7096 | Val F1: 0.5177 | Gap: 0.1919 | EM: 0.3333\n",
      "\n",
      "======================================================================\n",
      "EPOCH 77/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 77: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.677]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5770\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.18it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7169 | Val F1: 0.5370 | Gap: 0.1800 | EM: 0.3200\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 78/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 78: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.615]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5653\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.15it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7386 | Val F1: 0.5670 | Gap: 0.1716 | EM: 0.3600\n",
      "✓ SAVED! Best F1: 0.5670\n",
      "\n",
      "======================================================================\n",
      "EPOCH 79/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 79: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.623]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5573\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.66it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6761 | Val F1: 0.5324 | Gap: 0.1437 | EM: 0.3167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 80/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 80: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.438]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5471\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.90it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7074 | Val F1: 0.5152 | Gap: 0.1921 | EM: 0.3033\n",
      "\n",
      "======================================================================\n",
      "EPOCH 81/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 81: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.522]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5358\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.58it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7427 | Val F1: 0.5596 | Gap: 0.1831 | EM: 0.3467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 82/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 82: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.423]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5264\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7339 | Val F1: 0.5657 | Gap: 0.1682 | EM: 0.3467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 83/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 83: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.432]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5186\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.00it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7172 | Val F1: 0.5425 | Gap: 0.1747 | EM: 0.3300\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 84/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 84: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.854]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5052\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.04it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.24it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6987 | Val F1: 0.5371 | Gap: 0.1617 | EM: 0.3233\n",
      "\n",
      "======================================================================\n",
      "EPOCH 85/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 85: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.609]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4973\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7499 | Val F1: 0.5516 | Gap: 0.1984 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 86/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 86: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.427]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4876\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7203 | Val F1: 0.5661 | Gap: 0.1542 | EM: 0.3500\n",
      "\n",
      "======================================================================\n",
      "EPOCH 87/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 87: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.445]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4787\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.06it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7255 | Val F1: 0.5227 | Gap: 0.2028 | EM: 0.2833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 88/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 88: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.396]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4677\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.65it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.83it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7588 | Val F1: 0.5512 | Gap: 0.2077 | EM: 0.3333\n",
      "\n",
      "======================================================================\n",
      "EPOCH 89/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 89: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.398]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4618\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.95it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.41it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7496 | Val F1: 0.5646 | Gap: 0.1850 | EM: 0.3367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 90/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 90: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.477]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4557\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:05<00:00, 33.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7775 | Val F1: 0.5684 | Gap: 0.2091 | EM: 0.3500\n",
      "✓ SAVED! Best F1: 0.5684\n",
      "\n",
      "======================================================================\n",
      "EPOCH 91/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 91: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.445]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4419\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.15it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7641 | Val F1: 0.5787 | Gap: 0.1854 | EM: 0.3567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5787\n",
      "\n",
      "======================================================================\n",
      "EPOCH 92/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 92: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.319]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4352\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.81it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 28.32it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7501 | Val F1: 0.5707 | Gap: 0.1794 | EM: 0.3567\n",
      "\n",
      "======================================================================\n",
      "EPOCH 93/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 93: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.246]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4273\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.27it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7386 | Val F1: 0.5662 | Gap: 0.1723 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 94/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 94: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.411]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4193\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7460 | Val F1: 0.5545 | Gap: 0.1914 | EM: 0.3233\n",
      "\n",
      "======================================================================\n",
      "EPOCH 95/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 95: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.412]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4128\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.79it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7875 | Val F1: 0.5776 | Gap: 0.2099 | EM: 0.3600\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 96/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 96: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.667]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4036\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:05<00:00, 35.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:09<00:00, 30.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7602 | Val F1: 0.5792 | Gap: 0.1810 | EM: 0.3667\n",
      "✓ SAVED! Best F1: 0.5792\n",
      "\n",
      "======================================================================\n",
      "EPOCH 97/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 97: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.18it/s, loss=2.374]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3965\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 32.32it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.55it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7549 | Val F1: 0.5567 | Gap: 0.1982 | EM: 0.3367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 98/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 98: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.281]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3892\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.70it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7673 | Val F1: 0.5692 | Gap: 0.1981 | EM: 0.3467\n",
      "\n",
      "======================================================================\n",
      "EPOCH 99/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 99: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.327]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3820\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.35it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7932 | Val F1: 0.5770 | Gap: 0.2162 | EM: 0.3500\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 100/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 100: 100%|████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.228]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3731\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.66it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7590 | Val F1: 0.5516 | Gap: 0.2074 | EM: 0.3367\n",
      "\n",
      "======================================================================\n",
      "FINAL RESULTS\n",
      "======================================================================\n",
      "Best Val F1: 57.9%\n",
      "Final Val F1: 55.2%\n",
      "Final EM: 33.7%\n",
      "Train-Val Gap: 0.2074\n",
      "Training for seed 1238\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Total parameters: 21.7M\n",
      "Trainable parameters: 21.7M\n",
      "\n",
      "======================================================================\n",
      "TESTING Q/K HYPOTHESIS - Q/K LR = 20x\n",
      "======================================================================\n",
      "\n",
      "Q/K params: 1.1M\n",
      "Other params: 5.5M\n",
      "\n",
      "\n",
      "======================================================================\n",
      "EPOCH 1/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 1: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=8.212]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 11.4279\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:10<00:00, 19.78it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.00it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0020 | Val F1: 0.0102 | Gap: -0.0082 | EM: 0.0033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 6,000\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0102\n",
      "\n",
      "======================================================================\n",
      "EPOCH 2/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 2: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=7.387]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.5426\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.78it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.46it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0187 | Val F1: 0.0125 | Gap: 0.0062 | EM: 0.0033\n",
      "✓ SAVED! Best F1: 0.0125\n",
      "\n",
      "======================================================================\n",
      "EPOCH 3/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 3: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.694]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 7.0854\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0337 | Val F1: 0.0136 | Gap: 0.0201 | EM: 0.0033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 10\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0136\n",
      "\n",
      "======================================================================\n",
      "EPOCH 4/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 4: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=6.952]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.8268\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.23it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:31<00:00,  9.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0484 | Val F1: 0.0302 | Gap: 0.0182 | EM: 0.0033\n",
      "✓ SAVED! Best F1: 0.0302\n",
      "\n",
      "======================================================================\n",
      "EPOCH 5/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 5: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.424]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.6438\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.36it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0582 | Val F1: 0.0466 | Gap: 0.0115 | EM: 0.0067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0466\n",
      "\n",
      "======================================================================\n",
      "EPOCH 6/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 6: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.337]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.4944\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.24it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 10.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0529 | Val F1: 0.0530 | Gap: -0.0001 | EM: 0.0133\n",
      "✓ SAVED! Best F1: 0.0530\n",
      "\n",
      "======================================================================\n",
      "EPOCH 7/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 7: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.200]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.3647\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.17it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0644 | Val F1: 0.0619 | Gap: 0.0025 | EM: 0.0167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0619\n",
      "\n",
      "======================================================================\n",
      "EPOCH 8/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 8: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.000]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.2615\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.43it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.85it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0890 | Val F1: 0.0763 | Gap: 0.0127 | EM: 0.0267\n",
      "✓ SAVED! Best F1: 0.0763\n",
      "\n",
      "======================================================================\n",
      "EPOCH 9/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 9: 100%|██████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.088]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.1639\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.51it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.42it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0828 | Val F1: 0.0931 | Gap: -0.0103 | EM: 0.0367\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.0931\n",
      "\n",
      "======================================================================\n",
      "EPOCH 10/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 10: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.973]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 6.0754\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.0943 | Val F1: 0.0916 | Gap: 0.0027 | EM: 0.0367\n",
      "\n",
      "======================================================================\n",
      "EPOCH 11/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 11: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.395]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9907\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:20<00:00,  9.83it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:31<00:00,  9.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1310 | Val F1: 0.0979 | Gap: 0.0330 | EM: 0.0433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.0979\n",
      "\n",
      "======================================================================\n",
      "EPOCH 12/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 12: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.293]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.9192\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:19<00:00, 10.41it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:30<00:00,  9.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1385 | Val F1: 0.0998 | Gap: 0.0387 | EM: 0.0533\n",
      "✓ SAVED! Best F1: 0.0998\n",
      "\n",
      "======================================================================\n",
      "EPOCH 13/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 13: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=6.317]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.8471\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:21<00:00,  9.47it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:29<00:00, 10.33it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1307 | Val F1: 0.1209 | Gap: 0.0098 | EM: 0.0567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1209\n",
      "\n",
      "======================================================================\n",
      "EPOCH 14/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 14: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.885]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7755\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:15<00:00, 12.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1626 | Val F1: 0.1246 | Gap: 0.0380 | EM: 0.0567\n",
      "✓ SAVED! Best F1: 0.1246\n",
      "\n",
      "======================================================================\n",
      "EPOCH 15/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 15: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.854]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.7114\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:17<00:00, 11.53it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:28<00:00, 10.67it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1405 | Val F1: 0.1487 | Gap: -0.0081 | EM: 0.0767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2, 3, 3\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.1487\n",
      "\n",
      "======================================================================\n",
      "EPOCH 16/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 16: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.998]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.6416\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 11.00it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 11.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1695 | Val F1: 0.1487 | Gap: 0.0208 | EM: 0.0867\n",
      "✓ SAVED! Best F1: 0.1487\n",
      "\n",
      "======================================================================\n",
      "EPOCH 17/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 17: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=5.628]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5787\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:23<00:00,  8.48it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:34<00:00,  8.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1719 | Val F1: 0.1611 | Gap: 0.0109 | EM: 0.0867\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.1611\n",
      "\n",
      "======================================================================\n",
      "EPOCH 18/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 18: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.778]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.5047\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.99it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.51it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1840 | Val F1: 0.1917 | Gap: -0.0077 | EM: 0.1033\n",
      "✓ SAVED! Best F1: 0.1917\n",
      "\n",
      "======================================================================\n",
      "EPOCH 19/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 19: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.894]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.4269\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 11.06it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:27<00:00, 11.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1786 | Val F1: 0.1568 | Gap: 0.0218 | EM: 0.0700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 20/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 20: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=5.854]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.3217\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.10it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:24<00:00, 12.10it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2183 | Val F1: 0.1730 | Gap: 0.0453 | EM: 0.0933\n",
      "\n",
      "======================================================================\n",
      "EPOCH 21/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 21: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.847]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.2029\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.66it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.79it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.1734 | Val F1: 0.1846 | Gap: -0.0112 | EM: 0.0900\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 22/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 22: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.810]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 5.0482\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:18<00:00, 10.72it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:23<00:00, 12.93it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2018 | Val F1: 0.2082 | Gap: -0.0064 | EM: 0.1067\n",
      "✓ SAVED! Best F1: 0.2082\n",
      "\n",
      "======================================================================\n",
      "EPOCH 23/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 23: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.878]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.8882\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 12.19it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:25<00:00, 11.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2685 | Val F1: 0.2153 | Gap: 0.0532 | EM: 0.1133\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.2153\n",
      "\n",
      "======================================================================\n",
      "EPOCH 24/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 24: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.638]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.7341\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:16<00:00, 11.87it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2458 | Val F1: 0.2312 | Gap: 0.0146 | EM: 0.1267\n",
      "✓ SAVED! Best F1: 0.2312\n",
      "\n",
      "======================================================================\n",
      "EPOCH 25/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 25: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.412]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.5771\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.12it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3151 | Val F1: 0.2584 | Gap: 0.0567 | EM: 0.1400\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 2\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.2584\n",
      "\n",
      "======================================================================\n",
      "EPOCH 26/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 26: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.960]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.4350\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:14<00:00, 13.94it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:26<00:00, 11.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3073 | Val F1: 0.2927 | Gap: 0.0145 | EM: 0.1667\n",
      "✓ SAVED! Best F1: 0.2927\n",
      "\n",
      "======================================================================\n",
      "EPOCH 27/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 27: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=4.322]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.3030\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 15.23it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.19it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3070 | Val F1: 0.2994 | Gap: 0.0076 | EM: 0.1667\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.2994\n",
      "\n",
      "======================================================================\n",
      "EPOCH 28/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 28: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.782]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.1713\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 15.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.22it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.2963 | Val F1: 0.2925 | Gap: 0.0038 | EM: 0.1567\n",
      "\n",
      "======================================================================\n",
      "EPOCH 29/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 29: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.638]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 4.0549\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:13<00:00, 14.94it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3275 | Val F1: 0.3173 | Gap: 0.0103 | EM: 0.1933\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.3173\n",
      "\n",
      "======================================================================\n",
      "EPOCH 30/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 30: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.486]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.9555\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.43it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.88it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3573 | Val F1: 0.3348 | Gap: 0.0225 | EM: 0.1900\n",
      "✓ SAVED! Best F1: 0.3348\n",
      "\n",
      "======================================================================\n",
      "EPOCH 31/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 31: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.073]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.8654\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.53it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.83it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3738 | Val F1: 0.3372 | Gap: 0.0366 | EM: 0.1700\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3\n",
      "  F1: 0.000\n",
      "✓ SAVED! Best F1: 0.3372\n",
      "\n",
      "======================================================================\n",
      "EPOCH 32/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 32: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=4.029]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.7839\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.71it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:18<00:00, 16.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4106 | Val F1: 0.3642 | Gap: 0.0464 | EM: 0.1967\n",
      "✓ SAVED! Best F1: 0.3642\n",
      "\n",
      "======================================================================\n",
      "EPOCH 33/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 33: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.803]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.7084\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 21.12it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 17.76it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3977 | Val F1: 0.3904 | Gap: 0.0073 | EM: 0.2067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 3\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.3904\n",
      "\n",
      "======================================================================\n",
      "EPOCH 34/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 34: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.988]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.6426\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.39it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:20<00:00, 14.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3970 | Val F1: 0.3921 | Gap: 0.0050 | EM: 0.2133\n",
      "✓ SAVED! Best F1: 0.3921\n",
      "\n",
      "======================================================================\n",
      "EPOCH 35/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 35: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.403]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5743\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:12<00:00, 16.37it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:19<00:00, 15.28it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.3720 | Val F1: 0.3683 | Gap: 0.0036 | EM: 0.2000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 36/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 36: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.435]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.5144\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:11<00:00, 17.21it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:16<00:00, 18.67it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4001 | Val F1: 0.3846 | Gap: 0.0155 | EM: 0.2000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 37/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 37: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.505]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4621\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.00it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 23.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4133 | Val F1: 0.4135 | Gap: -0.0002 | EM: 0.2233\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.4135\n",
      "\n",
      "======================================================================\n",
      "EPOCH 38/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 38: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.542]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.4074\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.37it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.76it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4278 | Val F1: 0.4384 | Gap: -0.0105 | EM: 0.2600\n",
      "✓ SAVED! Best F1: 0.4384\n",
      "\n",
      "======================================================================\n",
      "EPOCH 39/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 39: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.310]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3594\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.59it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4886 | Val F1: 0.4406 | Gap: 0.0480 | EM: 0.2333\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4406\n",
      "\n",
      "======================================================================\n",
      "EPOCH 40/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 40: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.421]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.3153\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.98it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.58it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4659 | Val F1: 0.4287 | Gap: 0.0372 | EM: 0.2600\n",
      "\n",
      "======================================================================\n",
      "EPOCH 41/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 41: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.339]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2687\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.07it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:17<00:00, 17.59it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4866 | Val F1: 0.4331 | Gap: 0.0536 | EM: 0.2467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 42/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 42: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.939]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.2300\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.48it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.91it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4996 | Val F1: 0.4339 | Gap: 0.0658 | EM: 0.2500\n",
      "\n",
      "======================================================================\n",
      "EPOCH 43/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 43: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.448]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1954\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.29it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.15it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4977 | Val F1: 0.4518 | Gap: 0.0459 | EM: 0.2567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4518\n",
      "\n",
      "======================================================================\n",
      "EPOCH 44/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 44: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.052]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1563\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.42it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.52it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4944 | Val F1: 0.4626 | Gap: 0.0317 | EM: 0.2800\n",
      "✓ SAVED! Best F1: 0.4626\n",
      "\n",
      "======================================================================\n",
      "EPOCH 45/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 45: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.343]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.1286\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.34it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.4765 | Val F1: 0.4086 | Gap: 0.0679 | EM: 0.2467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 46/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 46: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.229]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0941\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.36it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.70it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5275 | Val F1: 0.4882 | Gap: 0.0393 | EM: 0.2867\n",
      "✓ SAVED! Best F1: 0.4882\n",
      "\n",
      "======================================================================\n",
      "EPOCH 47/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 47: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.003]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0671\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.96it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5106 | Val F1: 0.4539 | Gap: 0.0568 | EM: 0.2833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 48/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 48: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.241]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0350\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.08it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:15<00:00, 19.68it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5238 | Val F1: 0.4526 | Gap: 0.0712 | EM: 0.2733\n",
      "\n",
      "======================================================================\n",
      "EPOCH 49/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 49: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.014]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 3.0122\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.49it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.64it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5443 | Val F1: 0.4401 | Gap: 0.1042 | EM: 0.2667\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 50/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 50: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.266]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9899\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:09<00:00, 20.55it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.86it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5400 | Val F1: 0.4718 | Gap: 0.0681 | EM: 0.2733\n",
      "\n",
      "======================================================================\n",
      "EPOCH 51/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 51: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.012]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9623\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 22.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.71it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5472 | Val F1: 0.4884 | Gap: 0.0588 | EM: 0.3100\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4884\n",
      "\n",
      "======================================================================\n",
      "EPOCH 52/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 52: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=3.086]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9414\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.90it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.53it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5956 | Val F1: 0.4679 | Gap: 0.1277 | EM: 0.2833\n",
      "\n",
      "======================================================================\n",
      "EPOCH 53/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 53: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.646]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.9176\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.88it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5936 | Val F1: 0.4753 | Gap: 0.1183 | EM: 0.2967\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 54/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 54: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.856]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8976\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.48it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5793 | Val F1: 0.4546 | Gap: 0.1247 | EM: 0.2700\n",
      "\n",
      "======================================================================\n",
      "EPOCH 55/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 55: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.034]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8752\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.58it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5940 | Val F1: 0.4888 | Gap: 0.1052 | EM: 0.3000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.4888\n",
      "\n",
      "======================================================================\n",
      "EPOCH 56/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 56: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.987]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8609\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.48it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.83it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5709 | Val F1: 0.4710 | Gap: 0.0999 | EM: 0.2933\n",
      "\n",
      "======================================================================\n",
      "EPOCH 57/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 57: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.494]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8395\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.57it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.40it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.5759 | Val F1: 0.4878 | Gap: 0.0881 | EM: 0.3033\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 58/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 58: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.467]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8197\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.25it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.81it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6255 | Val F1: 0.5098 | Gap: 0.1156 | EM: 0.3067\n",
      "✓ SAVED! Best F1: 0.5098\n",
      "\n",
      "======================================================================\n",
      "EPOCH 59/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 59: 100%|█████████████████| 1875/1875 [05:03<00:00,  6.19it/s, loss=2.525]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.8020\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.41it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6289 | Val F1: 0.5070 | Gap: 0.1219 | EM: 0.3167\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 60/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 60: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.634]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7814\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 23.04it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.58it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6353 | Val F1: 0.4666 | Gap: 0.1687 | EM: 0.2900\n",
      "\n",
      "======================================================================\n",
      "EPOCH 61/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 61: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.896]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7671\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.04it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.18it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6085 | Val F1: 0.4822 | Gap: 0.1263 | EM: 0.3000\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 62/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 62: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.875]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7543\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.28it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6319 | Val F1: 0.5244 | Gap: 0.1075 | EM: 0.3267\n",
      "✓ SAVED! Best F1: 0.5244\n",
      "\n",
      "======================================================================\n",
      "EPOCH 63/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 63: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.706]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7389\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.32it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.18it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6429 | Val F1: 0.5053 | Gap: 0.1377 | EM: 0.3067\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 5\n",
      "  F1: 0.000\n",
      "\n",
      "======================================================================\n",
      "EPOCH 64/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 64: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.647]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7207\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.11it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 21.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6714 | Val F1: 0.5199 | Gap: 0.1515 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 65/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 65: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.771]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.7100\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6857 | Val F1: 0.5315 | Gap: 0.1542 | EM: 0.3267\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5315\n",
      "\n",
      "======================================================================\n",
      "EPOCH 66/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 66: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.575]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6952\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.80it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 23.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6642 | Val F1: 0.5133 | Gap: 0.1510 | EM: 0.3033\n",
      "\n",
      "======================================================================\n",
      "EPOCH 67/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 67: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.575]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6800\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.92it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:14<00:00, 20.38it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6601 | Val F1: 0.5311 | Gap: 0.1291 | EM: 0.3133\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 68/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 68: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=3.027]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6707\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.17it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.60it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6696 | Val F1: 0.5304 | Gap: 0.1393 | EM: 0.3300\n",
      "\n",
      "======================================================================\n",
      "EPOCH 69/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 69: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.572]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6540\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.73it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.93it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6838 | Val F1: 0.5557 | Gap: 0.1281 | EM: 0.3433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5557\n",
      "\n",
      "======================================================================\n",
      "EPOCH 70/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 70: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.716]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6439\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.03it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.16it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6949 | Val F1: 0.5609 | Gap: 0.1340 | EM: 0.3500\n",
      "✓ SAVED! Best F1: 0.5609\n",
      "\n",
      "======================================================================\n",
      "EPOCH 71/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 71: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.505]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6329\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.13it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.96it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6703 | Val F1: 0.5413 | Gap: 0.1290 | EM: 0.3300\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 72/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 72: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.555]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6188\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.99it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.56it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7077 | Val F1: 0.5372 | Gap: 0.1705 | EM: 0.3300\n",
      "\n",
      "======================================================================\n",
      "EPOCH 73/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 73: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.700]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.6040\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.52it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.90it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7053 | Val F1: 0.5401 | Gap: 0.1652 | EM: 0.3433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 74/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 74: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.670]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5919\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.17it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.99it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6712 | Val F1: 0.5418 | Gap: 0.1294 | EM: 0.3333\n",
      "\n",
      "======================================================================\n",
      "EPOCH 75/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 75: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.624]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5818\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.09it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.02it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6890 | Val F1: 0.5441 | Gap: 0.1450 | EM: 0.3200\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 76/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 76: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.455]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5735\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.33it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.72it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7163 | Val F1: 0.5653 | Gap: 0.1510 | EM: 0.3600\n",
      "✓ SAVED! Best F1: 0.5653\n",
      "\n",
      "======================================================================\n",
      "EPOCH 77/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 77: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.669]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5614\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.50it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.60it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7152 | Val F1: 0.5636 | Gap: 0.1516 | EM: 0.3533\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 78/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 78: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.729]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5516\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.23it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.63it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6889 | Val F1: 0.5758 | Gap: 0.1131 | EM: 0.3667\n",
      "✓ SAVED! Best F1: 0.5758\n",
      "\n",
      "======================================================================\n",
      "EPOCH 79/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 79: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.502]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5428\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 25.51it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.08it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7041 | Val F1: 0.5571 | Gap: 0.1469 | EM: 0.3467\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 80/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 80: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.711]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5325\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.29it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.36it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6977 | Val F1: 0.5735 | Gap: 0.1242 | EM: 0.3600\n",
      "\n",
      "======================================================================\n",
      "EPOCH 81/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 81: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.658]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5229\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.63it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.23it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7143 | Val F1: 0.5896 | Gap: 0.1247 | EM: 0.3767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "✓ SAVED! Best F1: 0.5896\n",
      "\n",
      "======================================================================\n",
      "EPOCH 82/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 82: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.273]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5112\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.65it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 23.03it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6925 | Val F1: 0.5692 | Gap: 0.1233 | EM: 0.3667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 83/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 83: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.707]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.5016\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.43it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7023 | Val F1: 0.5891 | Gap: 0.1132 | EM: 0.3767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 84/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 84: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.283]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4972\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 33.07it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 29.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6889 | Val F1: 0.5182 | Gap: 0.1707 | EM: 0.3200\n",
      "\n",
      "======================================================================\n",
      "EPOCH 85/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 85: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.631]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4842\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 28.02it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 23.81it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7165 | Val F1: 0.5520 | Gap: 0.1645 | EM: 0.3433\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 86/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 86: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.636]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4756\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.69it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 27.04it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6998 | Val F1: 0.5781 | Gap: 0.1217 | EM: 0.3533\n",
      "\n",
      "======================================================================\n",
      "EPOCH 87/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 87: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.349]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4667\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 26.66it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7164 | Val F1: 0.5791 | Gap: 0.1373 | EM: 0.3567\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 88/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 88: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.540]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4572\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.36it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.27it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7299 | Val F1: 0.5714 | Gap: 0.1585 | EM: 0.3767\n",
      "\n",
      "======================================================================\n",
      "EPOCH 89/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 89: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.438]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4505\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.04it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6813 | Val F1: 0.5811 | Gap: 0.1003 | EM: 0.3667\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 90/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 90: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.543]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4420\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.22it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.07it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.6985 | Val F1: 0.5746 | Gap: 0.1238 | EM: 0.3600\n",
      "\n",
      "======================================================================\n",
      "EPOCH 91/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 91: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.524]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4331\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.53it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 25.65it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7438 | Val F1: 0.5889 | Gap: 0.1549 | EM: 0.3667\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 92/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 92: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.257]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4258\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.67it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.31it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7280 | Val F1: 0.5915 | Gap: 0.1365 | EM: 0.3867\n",
      "✓ SAVED! Best F1: 0.5915\n",
      "\n",
      "======================================================================\n",
      "EPOCH 93/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 93: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.413]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4200\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.75it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7263 | Val F1: 0.5862 | Gap: 0.1400 | EM: 0.3633\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 94/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 94: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.404]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4081\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 29.85it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 22.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7349 | Val F1: 0.5704 | Gap: 0.1645 | EM: 0.3500\n",
      "\n",
      "======================================================================\n",
      "EPOCH 95/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 95: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.473]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.4026\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:08<00:00, 24.82it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:13<00:00, 21.90it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7320 | Val F1: 0.5978 | Gap: 0.1342 | EM: 0.3733\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "✓ SAVED! Best F1: 0.5978\n",
      "\n",
      "======================================================================\n",
      "EPOCH 96/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 96: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.392]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3958\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:07<00:00, 27.05it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.30it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7221 | Val F1: 0.6109 | Gap: 0.1112 | EM: 0.3967\n",
      "✓ SAVED! Best F1: 0.6109\n",
      "\n",
      "======================================================================\n",
      "EPOCH 97/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 97: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.348]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3875\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.27it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.79it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7379 | Val F1: 0.6033 | Gap: 0.1346 | EM: 0.3767\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "Attention: 0.0108\n",
      "\n",
      "======================================================================\n",
      "EPOCH 98/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 98: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.545]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3828\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 28.77it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:12<00:00, 24.87it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7490 | Val F1: 0.6107 | Gap: 0.1383 | EM: 0.3967\n",
      "\n",
      "======================================================================\n",
      "EPOCH 99/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 99: 100%|█████████████████| 1875/1875 [05:02<00:00,  6.20it/s, loss=2.728]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3722\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 30.18it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:11<00:00, 26.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7651 | Val F1: 0.6107 | Gap: 0.1544 | EM: 0.3833\n",
      "\n",
      "Sample:\n",
      "  Q: How many tons of carbon are absorbed the Amazon in a typical...\n",
      "  True: 1.5 gigatons\n",
      "  Pred: 1.5\n",
      "  F1: 0.667\n",
      "\n",
      "======================================================================\n",
      "EPOCH 100/100\n",
      "======================================================================\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Epoch 100: 100%|████████████████| 1875/1875 [05:02<00:00,  6.19it/s, loss=2.596]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Loss: 2.3680\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████████| 200/200 [00:06<00:00, 31.10it/s]\n",
      "Eval: 100%|███████████████████████████████████| 300/300 [00:10<00:00, 27.45it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train F1: 0.7564 | Val F1: 0.5942 | Gap: 0.1622 | EM: 0.3733\n",
      "\n",
      "======================================================================\n",
      "FINAL RESULTS\n",
      "======================================================================\n",
      "Best Val F1: 61.1%\n",
      "Final Val F1: 59.4%\n",
      "Final EM: 37.3%\n",
      "Train-Val Gap: 0.1622\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "if __name__ == \"__main__\":\n",
    "    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "    \n",
    "    print(\"=\"*70)\n",
    "    print(\"SQUAD ANSWER GENERATION WITH GLOVE EMBEDDINGS\")\n",
    "    print(\"=\"*70)\n",
    "    #print(f\"Expected F1: 40-55% (with GloVe)\")\n",
    "    print(f\"Model: {N_LAYERS}L, {D_MODEL}d, {N_HEADS}h\")\n",
    "    print(f\"Device: {device}\")\n",
    "    print(\"=\"*70 + \"\\n\")\n",
    "    \n",
    "    # Download and load GloVe\n",
    "    glove_file = download_and_extract_glove()\n",
    "    \n",
    "    if glove_file is None:\n",
    "        print(\"\\n WARNING: Could not load GloVe embeddings\")\n",
    "        print(\"Proceeding without pretrained embeddings (expect 15-25% F1)\")\n",
    "        pretrained_embeddings = None\n",
    "    \n",
    "    # Download SQuAD datasets\n",
    "    for name in ['train-v2.0.json', 'dev-v2.0.json']:\n",
    "        if not os.path.exists(name):\n",
    "            print(f\"Downloading {name}...\")\n",
    "            urllib.request.urlretrieve(\n",
    "                f'https://rajpurkar.github.io/SQuAD-explorer/dataset/{name}', name)\n",
    "    \n",
    "    # Setup tokenizer\n",
    "    print(\"Loading tokenizer...\")\n",
    "    tokenizer = GPT2Tokenizer.from_pretrained('gpt2')\n",
    "    tokenizer.pad_token = tokenizer.eos_token\n",
    "    \n",
    "    # Load GloVe embeddings for tokenizer\n",
    "    if glove_file:\n",
    "        pretrained_embeddings = load_glove_embeddings(glove_file, tokenizer, D_MODEL)\n",
    "    else:\n",
    "        pretrained_embeddings = None\n",
    "    \n",
    "    # Load datasets\n",
    "    print(\"Loading datasets...\")\n",
    "    full_train = SQuADDataset('train-v2.0.json', tokenizer, MAX_SEQ_LEN, MAX_ANSWER_LEN)\n",
    "    full_val = SQuADDataset('dev-v2.0.json', tokenizer, MAX_SEQ_LEN, MAX_ANSWER_LEN)\n",
    "    \n",
    "    train_ds = Subset(full_train, torch.randperm(len(full_train))[:TRAIN_SUBSET_SIZE])\n",
    "    val_ds = Subset(full_val, torch.randperm(len(full_val))[:VAL_SUBSET_SIZE])\n",
    "    \n",
    "    print(f\"Train: {len(train_ds)}, Val: {len(val_ds)}\\n\")\n",
    "    \n",
    "    loader = DataLoader(train_ds, BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True)\n",
    "    \n",
    "    # Model\n",
    "    print(\"Initializing model...\")\n",
    "    n_seed = [1234,1235,1236,1237,1238]\n",
    "    for seed_ in n_seed:\n",
    "        print(\"Training for seed\", seed_)\n",
    "        torch.manual_seed(seed_)\n",
    "        model = GPTAnswerGenerator(\n",
    "            vocab_size=tokenizer.vocab_size,\n",
    "            d_model=D_MODEL,\n",
    "            n_heads=N_HEADS,\n",
    "            n_layers=N_LAYERS,\n",
    "            d_ff=D_FF,\n",
    "            max_seq_len=MAX_SEQ_LEN,\n",
    "            dropout=DROPOUT,\n",
    "            pretrained_embeddings=pretrained_embeddings\n",
    "        ).to(device)\n",
    "\n",
    "        total_params = sum(p.numel() for p in model.parameters()) / 1e6\n",
    "        trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6\n",
    "        print(f\"Total parameters: {total_params:.1f}M\")\n",
    "        print(f\"Trainable parameters: {trainable_params:.1f}M\\n\")\n",
    "\n",
    "        # Optimizer with differential learning rates for embeddings\n",
    "        if TEST_QK_HYPOTHESIS:\n",
    "            print(\"=\"*70)\n",
    "            print(f\"TESTING Q/K HYPOTHESIS - Q/K LR = {QK_LR_MULTIPLIER}x\")\n",
    "            print(\"=\"*70 + \"\\n\")\n",
    "            embedding_params = [model.token_embedding.weight]\n",
    "            qk = [p for n, p in model.named_parameters() if 'q_linear' in n or 'k_linear' in n]\n",
    "            other = [p for n, p in model.named_parameters() if 'q_linear' not in n and \n",
    "                     'k_linear' not in n and 'token_embedding' not in n]\n",
    "\n",
    "            print(f\"Q/K params: {sum(p.numel() for p in qk)/1e6:.1f}M\")\n",
    "            print(f\"Other params: {sum(p.numel() for p in other)/1e6:.1f}M\\n\")\n",
    "\n",
    "            opt = torch.optim.AdamW([\n",
    "                {'params':embedding_params,'lr': BASE_LR * 0.1, 'weight_decay': 0},\n",
    "                {'params': qk, 'lr': BASE_LR * QK_LR_MULTIPLIER, 'weight_decay': WEIGHT_DECAY},\n",
    "                {'params': other, 'lr': BASE_LR, 'weight_decay': WEIGHT_DECAY}\n",
    "            ])\n",
    "\n",
    "            sched = torch.optim.lr_scheduler.OneCycleLR(\n",
    "                opt, [BASE_LR * 0.1,BASE_LR * QK_LR_MULTIPLIER, BASE_LR],\n",
    "                total_steps=len(loader) * NUM_EPOCHS,\n",
    "                pct_start=WARMUP_STEPS / (len(loader) * NUM_EPOCHS)\n",
    "            )\n",
    "        else:\n",
    "            print(\"=\"*70)\n",
    "            print(\"BASELINE (Standard LR)\")\n",
    "            print(\"=\"*70 + \"\\n\")\n",
    "\n",
    "            # Use lower LR for pretrained embeddings if they exist\n",
    "            if pretrained_embeddings is not None:\n",
    "                embedding_params = [model.token_embedding.weight]\n",
    "                other_params = [p for n, p in model.named_parameters() if 'token_embedding' not in n]\n",
    "\n",
    "                opt = torch.optim.AdamW([\n",
    "                    {'params': embedding_params, 'lr': BASE_LR * 0.1, 'weight_decay': 0},  # Fine-tune slowly\n",
    "                    {'params': other_params, 'lr': BASE_LR, 'weight_decay': WEIGHT_DECAY}\n",
    "                ])\n",
    "\n",
    "                print(\"Using differential LR: embeddings=0.1x, other=1.0x\\n\")\n",
    "            else:\n",
    "                opt = torch.optim.AdamW(model.parameters(), BASE_LR, weight_decay=WEIGHT_DECAY)\n",
    "\n",
    "            sched = torch.optim.lr_scheduler.OneCycleLR(\n",
    "                opt,\n",
    "                max_lr=BASE_LR if pretrained_embeddings is None else [BASE_LR * 0.1, BASE_LR],\n",
    "                total_steps=len(loader) * NUM_EPOCHS,\n",
    "                pct_start=WARMUP_STEPS / (len(loader) * NUM_EPOCHS)\n",
    "            )\n",
    "\n",
    "        # Train\n",
    "        best_f1 = 0\n",
    "        results = {'loss': [], 'train_f1': [], 'val_f1': [], 'val_em': [], 'attn': []}\n",
    "\n",
    "        for e in range(NUM_EPOCHS):\n",
    "            print(f\"\\n{'='*70}\")\n",
    "            print(f\"EPOCH {e+1}/{NUM_EPOCHS}\")\n",
    "            print('='*70)\n",
    "\n",
    "            loss = train_epoch(model, loader, opt, sched, device, e+1)\n",
    "            results['loss'].append(loss)\n",
    "            print(f\"\\nLoss: {loss:.4f}\")\n",
    "\n",
    "            # Eval\n",
    "            train_m = evaluate(model, train_ds, tokenizer, device, 200)\n",
    "            val_m = evaluate(model, val_ds, tokenizer, device, 300)\n",
    "\n",
    "            results['train_f1'].append(train_m['f1'])\n",
    "            results['val_f1'].append(val_m['f1'])\n",
    "            results['val_em'].append(val_m['em'])\n",
    "\n",
    "            gap = train_m['f1'] - val_m['f1']\n",
    "            print(f\"Train F1: {train_m['f1']:.4f} | Val F1: {val_m['f1']:.4f} | Gap: {gap:.4f} | EM: {val_m['em']:.4f}\")\n",
    "\n",
    "            # Sample\n",
    "            if e % 2 == 0:\n",
    "                item = val_ds.dataset.data[val_ds.indices[0]]\n",
    "                pred = generate(model, tokenizer, item['context'], item['question'], device)\n",
    "                print(f\"\\nSample:\")\n",
    "                print(f\"  Q: {item['question'][:60]}...\")\n",
    "                print(f\"  True: {item['answer']}\")\n",
    "                print(f\"  Pred: {pred}\")\n",
    "                print(f\"  F1: {f1_score(pred, item['answer']):.3f}\")\n",
    "\n",
    "            # Attention\n",
    "            if e % 4 == 0 and TEST_QK_HYPOTHESIS:\n",
    "                attn = analyze_attention(model, val_ds, tokenizer, device)\n",
    "                results['attn'].append(attn)\n",
    "                print(f\"Attention: {attn:.4f}\")\n",
    "\n",
    "            # Save best\n",
    "            if val_m['f1'] > best_f1:\n",
    "                best_f1 = val_m['f1']\n",
    "                name = 'best_qk_'+str(seed_)+'.pt' if TEST_QK_HYPOTHESIS else 'best_baseline_'+str(seed_)+'.pt'\n",
    "                torch.save({'model': model.state_dict(), 'f1': best_f1, 'epoch': e+1}, name)\n",
    "                print(f\"✓ SAVED! Best F1: {best_f1:.4f}\")\n",
    "\n",
    "        # Final\n",
    "        print(f\"\\n{'='*70}\")\n",
    "        print(\"FINAL RESULTS\")\n",
    "        print('='*70)\n",
    "        print(f\"Best Val F1: {best_f1*100:.1f}%\")\n",
    "        print(f\"Final Val F1: {results['val_f1'][-1]*100:.1f}%\")\n",
    "        print(f\"Final EM: {results['val_em'][-1]*100:.1f}%\")\n",
    "        print(f\"Train-Val Gap: {results['train_f1'][-1] - results['val_f1'][-1]:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "4197bcc1",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|███████████████████████████████| 40000/40000 [20:52<00:00, 31.94it/s]\n"
     ]
    }
   ],
   "source": [
    "# Eval\n",
    "train_m = evaluate(model, train_ds, tokenizer, device, 40000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14dcc4d4",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_m[\"f1\"],train_m[\"em\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01114815",
   "metadata": {},
   "outputs": [],
   "source": [
    "val_m = evaluate(model, val_ds, tokenizer, device, 20000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c37c3907",
   "metadata": {},
   "outputs": [],
   "source": [
    "val_m[\"f1\"],val_m[\"em\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae5121cc",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c57fa4a8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading tokenizer...\n",
      "✓ GloVe embeddings found: glove.6B.300d.txt\n",
      "\n",
      "======================================================================\n",
      "LOADING GLOVE EMBEDDINGS\n",
      "======================================================================\n",
      "Reading GloVe file (this takes ~1 minute)...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading GloVe: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400000/400000 [00:32<00:00, 12352.13it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Loaded 400,000 GloVe vectors\n",
      "Matching tokenizer vocabulary with GloVe...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Matching: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50257/50257 [00:00<00:00, 309838.63it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Matched 43,058/50,257 tokens (85.7%)\n",
      "======================================================================\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading datasets...\n",
      "Initializing baseline model instance...1234\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [24:08<00:00, 27.62it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.7205811190274382, 'em': 0.561875}\n",
      "Initializing baseline model instance...1235\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [26:57<00:00, 24.73it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.7246918273201091, 'em': 0.563}\n",
      "Initializing baseline model instance...1236\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [23:25<00:00, 28.47it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.7313670387150584, 'em': 0.57665}\n",
      "Initializing baseline model instance...1237\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [23:28<00:00, 28.39it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.718852340662438, 'em': 0.571575}\n",
      "Initializing baseline model instance...1238\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [26:30<00:00, 25.14it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.733321006810185, 'em': 0.57715}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# Add this if not present\n",
    "from transformers import GPT2TokenizerFast\n",
    "\n",
    "# loading and evaluating faster_qk model on train data for f1 score\n",
    "\n",
    "\n",
    "seeds_list = [1234, 1235,1236, 1237, 1238]\n",
    "\n",
    "\n",
    "\n",
    "print(\"Loading tokenizer...\")\n",
    "tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "DROPOUT = 0.2\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "glove_file = download_and_extract_glove()\n",
    "pretrained_embeddings = load_glove_embeddings(glove_file, tokenizer, D_MODEL)\n",
    "\n",
    "\n",
    "\n",
    "# Load datasets\n",
    "print(\"Loading datasets...\")\n",
    "full_train = SQuADDataset('train-v2.0.json', tokenizer, MAX_SEQ_LEN, MAX_ANSWER_LEN)\n",
    "\n",
    "\n",
    "train_ds = Subset(full_train, torch.randperm(len(full_train))[:40000])\n",
    "\n",
    "\n",
    "\n",
    "for nseed in seeds_list:\n",
    "    BASELINE_CKPT = \"\"\"/home/cs22d010/Squad_QA/Squad_Results/Rebuiding-From-Scratch/best_qk_\"\"\"+str(nseed)+\"\"\".pt\"\"\" #\"best_qk.pt\"  #\"best_baseline.pt\"  #  \"best_qk.pt\"\n",
    "    model_kwargs = dict(vocab_size=tokenizer.vocab_size,\n",
    "                        d_model=D_MODEL, n_heads=N_HEADS,\n",
    "                        n_layers=N_LAYERS, d_ff=D_FF,\n",
    "                        max_seq_len=MAX_SEQ_LEN, \n",
    "                        dropout=DROPOUT,pretrained_embeddings=pretrained_embeddings)\n",
    "    print(\"Initializing baseline model instance...\"+str(nseed))\n",
    "    baseline = GPTAnswerGenerator(**model_kwargs).to(device)\n",
    "    baseline_ckpt = torch.load(BASELINE_CKPT, map_location=device)\n",
    "    baseline.load_state_dict(baseline_ckpt[\"model\"])\n",
    "    print(\"Baseline model loaded.\")\n",
    "    \n",
    "    train_m = evaluate(baseline, train_ds, tokenizer, device,40000)\n",
    "\n",
    "    print(train_m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "a35dcb2a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading tokenizer...\n",
      "✓ GloVe embeddings found: glove.6B.300d.txt\n",
      "\n",
      "======================================================================\n",
      "LOADING GLOVE EMBEDDINGS\n",
      "======================================================================\n",
      "Reading GloVe file (this takes ~1 minute)...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading GloVe: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400000/400000 [00:22<00:00, 17979.49it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Loaded 400,000 GloVe vectors\n",
      "Matching tokenizer vocabulary with GloVe...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Matching: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50257/50257 [00:00<00:00, 330287.53it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Matched 43,058/50,257 tokens (85.7%)\n",
      "======================================================================\n",
      "\n",
      "Loading datasets...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Initializing baseline model instance...1234\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [25:56<00:00, 25.69it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.8296049393268525, 'em': 0.725625}\n",
      "Initializing baseline model instance...1235\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [24:54<00:00, 26.76it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.803488474110378, 'em': 0.6802}\n",
      "Initializing baseline model instance...1236\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [27:23<00:00, 24.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.736969098824084, 'em': 0.585725}\n",
      "Initializing baseline model instance...1237\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [26:17<00:00, 25.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.8185366758991849, 'em': 0.702075}\n",
      "Initializing baseline model instance...1238\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 40000/40000 [22:56<00:00, 29.06it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.8426166410023589, 'em': 0.738275}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# loading and evaluating baseline model on train data for f1 score\n",
    "\n",
    "\n",
    "\n",
    "seeds_list = [1234, 1235,1236, 1237, 1238]\n",
    "\n",
    "\n",
    "\n",
    "print(\"Loading tokenizer...\")\n",
    "tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "DROPOUT = 0.2\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "glove_file = download_and_extract_glove()\n",
    "pretrained_embeddings = load_glove_embeddings(glove_file, tokenizer, D_MODEL)\n",
    "\n",
    "\n",
    "\n",
    "# Load datasets\n",
    "print(\"Loading datasets...\")\n",
    "full_train = SQuADDataset('train-v2.0.json', tokenizer, MAX_SEQ_LEN, MAX_ANSWER_LEN)\n",
    "\n",
    "\n",
    "train_ds = Subset(full_train, torch.randperm(len(full_train))[:40000])\n",
    "\n",
    "\n",
    "\n",
    "for nseed in seeds_list:\n",
    "    BASELINE_CKPT = \"\"\"/home/cs22d010/Squad_QA/Squad_Results/Rebuiding-From-Scratch/best_baseline_\"\"\"+str(nseed)+\"\"\".pt\"\"\" #\"best_qk.pt\"  #\"best_baseline.pt\"  #  \"best_qk.pt\"\n",
    "    model_kwargs = dict(vocab_size=tokenizer.vocab_size,\n",
    "                        d_model=D_MODEL, n_heads=N_HEADS,\n",
    "                        n_layers=N_LAYERS, d_ff=D_FF,\n",
    "                        max_seq_len=MAX_SEQ_LEN, \n",
    "                        dropout=DROPOUT,pretrained_embeddings=pretrained_embeddings)\n",
    "    print(\"Initializing baseline model instance...\"+str(nseed))\n",
    "    baseline = GPTAnswerGenerator(**model_kwargs).to(device)\n",
    "    baseline_ckpt = torch.load(BASELINE_CKPT, map_location=device)\n",
    "    baseline.load_state_dict(baseline_ckpt[\"model\"])\n",
    "    print(\"Baseline model loaded.\")\n",
    "    \n",
    "    train_m = evaluate(baseline, train_ds, tokenizer, device,40000)\n",
    "\n",
    "    print(train_m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b633d97a",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "8af4bc17",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading tokenizer...\n",
      "✓ GloVe embeddings found: glove.6B.300d.txt\n",
      "\n",
      "======================================================================\n",
      "LOADING GLOVE EMBEDDINGS\n",
      "======================================================================\n",
      "Reading GloVe file (this takes ~1 minute)...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading GloVe: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400000/400000 [00:21<00:00, 18850.05it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Loaded 400,000 GloVe vectors\n",
      "Matching tokenizer vocabulary with GloVe...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Matching: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50257/50257 [00:00<00:00, 380099.89it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Matched 43,058/50,257 tokens (85.7%)\n",
      "======================================================================\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading datasets...\n",
      "Initializing baseline model instance...1234\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [03:04<00:00, 27.15it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.576054313749381, 'em': 0.3712}\n",
      "Initializing baseline model instance...1235\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [03:30<00:00, 23.74it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.587751463582432, 'em': 0.3774}\n",
      "Initializing baseline model instance...1236\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [02:50<00:00, 29.35it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.6024746737280913, 'em': 0.4022}\n",
      "Initializing baseline model instance...1237\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [02:44<00:00, 30.34it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.5653474290713689, 'em': 0.3666}\n",
      "Initializing baseline model instance...1238\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [03:14<00:00, 25.76it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.6014094317562373, 'em': 0.3922}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# Add this if not present\n",
    "from transformers import GPT2TokenizerFast\n",
    "\n",
    "# loading and evaluating faster_qk model on train data for f1 score\n",
    "\n",
    "\n",
    "seeds_list = [1234, 1235,1236, 1237, 1238]\n",
    "\n",
    "\n",
    "\n",
    "print(\"Loading tokenizer...\")\n",
    "tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "DROPOUT = 0.2\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "glove_file = download_and_extract_glove()\n",
    "pretrained_embeddings = load_glove_embeddings(glove_file, tokenizer, D_MODEL)\n",
    "\n",
    "\n",
    "\n",
    "# Load datasets\n",
    "print(\"Loading datasets...\")\n",
    "full_train = SQuADDataset('dev-v2.0.json', tokenizer, MAX_SEQ_LEN, MAX_ANSWER_LEN)\n",
    "\n",
    "\n",
    "train_ds = Subset(full_train, torch.randperm(len(full_train))[:5000])\n",
    "\n",
    "\n",
    "\n",
    "for nseed in seeds_list:\n",
    "    BASELINE_CKPT = \"\"\"/home/cs22d010/Squad_QA/Squad_Results/Rebuiding-From-Scratch/best_qk_\"\"\"+str(nseed)+\"\"\".pt\"\"\" #\"best_qk.pt\"  #\"best_baseline.pt\"  #  \"best_qk.pt\"\n",
    "    model_kwargs = dict(vocab_size=tokenizer.vocab_size,\n",
    "                        d_model=D_MODEL, n_heads=N_HEADS,\n",
    "                        n_layers=N_LAYERS, d_ff=D_FF,\n",
    "                        max_seq_len=MAX_SEQ_LEN, \n",
    "                        dropout=DROPOUT,pretrained_embeddings=pretrained_embeddings)\n",
    "    print(\"Initializing baseline model instance...\"+str(nseed))\n",
    "    baseline = GPTAnswerGenerator(**model_kwargs).to(device)\n",
    "    baseline_ckpt = torch.load(BASELINE_CKPT, map_location=device)\n",
    "    baseline.load_state_dict(baseline_ckpt[\"model\"])\n",
    "    print(\"Baseline model loaded.\")\n",
    "    \n",
    "    train_m = evaluate(baseline, train_ds, tokenizer, device,40000)\n",
    "\n",
    "    print(train_m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e01198d1",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "abbf40af",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading tokenizer...\n",
      "✓ GloVe embeddings found: glove.6B.300d.txt\n",
      "\n",
      "======================================================================\n",
      "LOADING GLOVE EMBEDDINGS\n",
      "======================================================================\n",
      "Reading GloVe file (this takes ~1 minute)...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading GloVe: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 400000/400000 [00:21<00:00, 18715.21it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Loaded 400,000 GloVe vectors\n",
      "Matching tokenizer vocabulary with GloVe...\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Matching: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50257/50257 [00:00<00:00, 339416.85it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "✓ Matched 43,058/50,257 tokens (85.7%)\n",
      "======================================================================\n",
      "\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Loading datasets...\n",
      "Initializing baseline model instance...1234\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [03:08<00:00, 26.54it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.5681394480854622, 'em': 0.3682}\n",
      "Initializing baseline model instance...1235\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [03:11<00:00, 26.15it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.5702109240811355, 'em': 0.369}\n",
      "Initializing baseline model instance...1236\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [03:29<00:00, 23.92it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.553259539025865, 'em': 0.3524}\n",
      "Initializing baseline model instance...1237\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [03:25<00:00, 24.37it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.571212736113227, 'em': 0.367}\n",
      "Initializing baseline model instance...1238\n",
      "Initializing token embeddings with GloVe...\n",
      "✓ Token embeddings initialized with GloVe\n",
      "Baseline model loaded.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Eval: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [02:55<00:00, 28.44it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'f1': 0.5921552339345275, 'em': 0.3914}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# loading and evaluating baseline model on train data for f1 score\n",
    "\n",
    "\n",
    "\n",
    "seeds_list = [1234, 1235,1236, 1237, 1238]\n",
    "\n",
    "\n",
    "\n",
    "print(\"Loading tokenizer...\")\n",
    "tokenizer = GPT2TokenizerFast.from_pretrained(\"gpt2\")\n",
    "tokenizer.pad_token = tokenizer.eos_token\n",
    "DROPOUT = 0.2\n",
    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
    "\n",
    "glove_file = download_and_extract_glove()\n",
    "pretrained_embeddings = load_glove_embeddings(glove_file, tokenizer, D_MODEL)\n",
    "\n",
    "\n",
    "\n",
    "# Load datasets\n",
    "print(\"Loading datasets...\")\n",
    "full_train = SQuADDataset('dev-v2.0.json', tokenizer, MAX_SEQ_LEN, MAX_ANSWER_LEN)\n",
    "\n",
    "\n",
    "train_ds = Subset(full_train, torch.randperm(len(full_train))[:5000])\n",
    "\n",
    "\n",
    "\n",
    "for nseed in seeds_list:\n",
    "    BASELINE_CKPT = \"\"\"/home/cs22d010/Squad_QA/Squad_Results/Rebuiding-From-Scratch/best_baseline_\"\"\"+str(nseed)+\"\"\".pt\"\"\" #\"best_qk.pt\"  #\"best_baseline.pt\"  #  \"best_qk.pt\"\n",
    "    model_kwargs = dict(vocab_size=tokenizer.vocab_size,\n",
    "                        d_model=D_MODEL, n_heads=N_HEADS,\n",
    "                        n_layers=N_LAYERS, d_ff=D_FF,\n",
    "                        max_seq_len=MAX_SEQ_LEN, \n",
    "                        dropout=DROPOUT,pretrained_embeddings=pretrained_embeddings)\n",
    "    print(\"Initializing baseline model instance...\"+str(nseed))\n",
    "    baseline = GPTAnswerGenerator(**model_kwargs).to(device)\n",
    "    baseline_ckpt = torch.load(BASELINE_CKPT, map_location=device)\n",
    "    baseline.load_state_dict(baseline_ckpt[\"model\"])\n",
    "    print(\"Baseline model loaded.\")\n",
    "    \n",
    "    train_m = evaluate(baseline, train_ds, tokenizer, device,40000)\n",
    "\n",
    "    print(train_m)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c4c7091b",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
