{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "275e52ac",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/matteo/anaconda3/envs/LanguageModelDecoder/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1.13.0+cu117\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "import os\n",
    "print(torch.__version__)\n",
    "import os\n",
    "\n",
    "import pickle\n",
    "import sys\n",
    "sys.path.append('../')\n",
    "import  utils.lmDecoderUtils as lmDecoderUtils\n",
    "import numpy as np\n",
    "import os \n",
    "import time\n",
    "import tqdm\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d69284ed",
   "metadata": {},
   "outputs": [],
   "source": [
    "# use LanguageModelDecoder env"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4c3726b3",
   "metadata": {},
   "source": [
    "## Load the logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "e826c73f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# results_dir = \"../results/mfcc_sm_gru_ctc_LONGRUN/\"\n",
    "# results_dir =\"../results/gru_ctc_diphones_dualhead\"\n",
    "\n",
    "COMPETITION_DATA = False\n",
    "# results_dir = \"../competition_evaluation/results/mfcc_sm_gru_ctc_LONGRUN_competition/\"\n",
    "results_dir = \"../results/ensemble_mfcc_sm_gru_ctc/\"\n",
    "pred_logits = pickle.load(open(os.path.join(results_dir, \"pred_logits.pkl\"), \"rb\"))\n",
    "\n",
    "if not COMPETITION_DATA:\n",
    "    # load the ground truth labels\n",
    "    df = pd.read_csv(os.path.join(results_dir, \"results.csv\")) #comment just for competition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "535ed867",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "WARNING: Logging before InitGoogleLogging() is written to STDERR\n",
      "I0613 13:41:48.258917 2875766 brain_speech_decoder.h:52] Reading fst /data/speech_5gram/lang_test/TLG.fst\n",
      "I0613 13:45:57.212535 2875766 brain_speech_decoder.h:58] Reading lm fst /data/speech_5gram/lang_test/G.fst\n",
      "I0613 13:46:47.809588 2875766 brain_speech_decoder.h:70] Reading rescore fst /data/speech_5gram/lang_test/G_no_prune.fst\n",
      "I0613 14:00:59.630393 2875766 brain_speech_decoder.h:81] Reading symbol table /data/speech_5gram/lang_test/words.txt\n"
     ]
    }
   ],
   "source": [
    "\n",
    "\n",
    "# # Load OPT 6B model\n",
    "# llm, llm_tokenizer = lmDecoderUtils.build_opt(\n",
    "#     cacheDir=\"/data/\", device=\"auto\", load_in_8bit=True\n",
    "# )\n",
    "\n",
    "ngramDecoder = lmDecoderUtils.build_lm_decoder(\n",
    "    \"/data/speech_5gram/lang_test\", acoustic_scale=0.5, nbest=100, beam=18\n",
    ")\n",
    "\n",
    "# LM decoding hyperparameters\n",
    "acoustic_scale = 0.5\n",
    "blank_penalty = np.log(7)\n",
    "llm_weight = 0.5\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "baeb9ec8",
   "metadata": {},
   "outputs": [],
   "source": [
    "llm, llm_tokenizer = lmDecoderUtils.build_gpt2_torch()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "aacc96a2",
   "metadata": {},
   "outputs": [],
   "source": [
    "# if items of pred_logits are tensor, convert to cpu and numpy\n",
    "for i in range(len(pred_logits)):\n",
    "    if isinstance(pred_logits[i], torch.Tensor):\n",
    "        pred_logits[i] = pred_logits[i].cpu().numpy()\n",
    "        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "2ad4ab56",
   "metadata": {},
   "outputs": [],
   "source": [
    "if not COMPETITION_DATA:\n",
    "    sentences = df[\"True Sentence\"].tolist()    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "41b5f3b0",
   "metadata": {},
   "outputs": [],
   "source": [
    "logits_unfolded = [item for sublist in pred_logits for item in sublist]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "cb81ef3c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-2.9265929e-03, -1.3528991e+01, -1.1602575e+01, -1.1357799e+01,\n",
       "       -1.2818812e+01, -1.2259154e+01, -1.2478169e+01, -1.0868212e+01,\n",
       "       -9.8153744e+00, -7.8828115e+00, -9.2109613e+00, -1.1855935e+01,\n",
       "       -1.0936158e+01, -1.2295590e+01, -1.1268902e+01, -9.2705212e+00,\n",
       "       -9.5302753e+00, -1.0859264e+01, -1.0064696e+01, -9.6504145e+00,\n",
       "       -1.0031100e+01, -1.0290339e+01, -1.2285597e+01, -1.0801009e+01,\n",
       "       -1.2156108e+01, -1.2798909e+01, -1.3683257e+01, -1.2073539e+01,\n",
       "       -1.0700659e+01, -7.5869784e+00, -9.9788504e+00, -8.4567471e+00,\n",
       "       -1.0264856e+01, -1.4081558e+01, -1.2501971e+01, -1.2350950e+01,\n",
       "       -1.1917652e+01, -9.2900295e+00, -8.7677574e+00, -1.0927437e+01,\n",
       "       -7.1558056e+00], dtype=float32)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logits_unfolded[0][0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "e0de35a7",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 880/880 [28:15<00:00,  1.93s/it]  \n"
     ]
    }
   ],
   "source": [
    "llm_outputs = []\n",
    "# Generate nbest outputs from 5gram LM\n",
    "start_t = time.time()\n",
    "nbest_outputs = []\n",
    "for j in tqdm.trange(len(logits_unfolded)):\n",
    "    logits = logits_unfolded[j]\n",
    "    logits = np.concatenate(\n",
    "        [logits[:, 1:], logits[:, 0:1]], axis=-1\n",
    "    )  # Blank is last token\n",
    "    logits = lmDecoderUtils.rearrange_speech_logits(logits[None, :, :], has_sil=True)\n",
    "    nbest = lmDecoderUtils.lm_decode(\n",
    "        ngramDecoder,\n",
    "        logits[0],\n",
    "        blankPenalty=blank_penalty,\n",
    "        returnNBest=True,\n",
    "        rescore=True,\n",
    "    )\n",
    "    nbest_outputs.append(nbest)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9490ba4d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 50%|█████     | 441/880 [2:11:12<1:54:17, 15.62s/it]"
     ]
    }
   ],
   "source": [
    "decoded_sentences = []\n",
    "confidences = []\n",
    "for i in tqdm.trange(len(nbest_outputs)):\n",
    "    nbest_output = nbest_outputs[i]\n",
    "    decoded, confidence = lmDecoderUtils.gpt2_lm_decode(llm, llm_tokenizer, nbest_output, acoustic_scale, 0, alpha=llm_weight, returnConfidence=True)\n",
    "    decoded_sentences.append(decoded)\n",
    "    confidences.append(confidence)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "28f3ab8c",
   "metadata": {},
   "outputs": [
    {
     "ename": "SyntaxError",
     "evalue": "'break' outside loop (1107370803.py, line 6)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;36m  Cell \u001b[0;32mIn[12], line 6\u001b[0;36m\u001b[0m\n\u001b[0;31m    break\u001b[0m\n\u001b[0m    ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m 'break' outside loop\n"
     ]
    }
   ],
   "source": [
    "if COMPETITION_DATA:\n",
    "    res_df = pd.DataFrame({\n",
    "    \"pred_sentence\": decoded_sentences,})\n",
    "    res_df.to_csv(f\"{results_dir}/decoded_competition_wfst.csv\", index=False)\n",
    "    print(\"fine\")\n",
    "    break\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5c8d8b00",
   "metadata": {},
   "outputs": [],
   "source": [
    "# res_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4dfb6744",
   "metadata": {},
   "source": [
    "## ADD HERE REST OF EVALUATION"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c15515a9",
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "\n",
    "def clean_prediction(pred_sentence, remove_word_repeats=True, min_char_repeat=4):\n",
    "    \"\"\"\n",
    "    Cleans a predicted sentence by removing repeated characters/words and trailing garbage.\n",
    "\n",
    "    Args:\n",
    "        pred_sentence (str): Raw predicted sentence.\n",
    "        remove_word_repeats (bool): If True, removes repeated word sequences like 'well well well'.\n",
    "        min_char_repeat (int): Threshold above which repeated characters (e.g., 'd d d d') are removed.\n",
    "\n",
    "    Returns:\n",
    "        str: Cleaned sentence.\n",
    "    \"\"\"\n",
    "    s = pred_sentence.lower()\n",
    "\n",
    "    # Remove character-level repetition like \"d d d d d\"\n",
    "    s = re.sub(rf'\\b(\\w)(?:\\s\\1){{{min_char_repeat - 1},}}\\b', r'\\1', s)\n",
    "\n",
    "    # Optionally remove repeated words like \"well well well\"\n",
    "    if remove_word_repeats:\n",
    "        s = re.sub(r'\\b(\\w+)(?:\\s+\\1){2,}\\b', r'\\1', s)\n",
    "\n",
    "    # Collapse multiple spaces\n",
    "    s = re.sub(r'\\s{2,}', ' ', s)\n",
    "\n",
    "    # Strip leading/trailing punctuation and whitespace\n",
    "    s = s.strip(\" ,.;!?\\\"'-\\n\\t\")\n",
    "\n",
    "    # Remove final \"space + single character\" if it looks like garbage (e.g., \"everything d\")\n",
    "    s = re.sub(r'\\s+\\w$', '', s)\n",
    "\n",
    "    return s\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "bfafcf69",
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "def preprocess_text(text):\n",
    "    \"\"\"\n",
    "    Remove punctuation, strip, and convert text to lowercase.\n",
    "    \"\"\"\n",
    "    return text.translate(str.maketrans('', '', string.punctuation)).strip().lower()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31661d86",
   "metadata": {},
   "outputs": [],
   "source": [
    "import jiwer  # For WER\n",
    "import sacrebleu  # For BLEU\n",
    "from rouge_score import rouge_scorer  # For ROUGE\n",
    "from nltk.translate.meteor_score import meteor_score  # For METEOR\n",
    "import bert_score  # For BERTScore\n",
    "import numpy as np\n",
    "\n",
    "def compute_metrics(text_transcriptions, gpt_decoded):\n",
    "    \"\"\"\n",
    "    Compute various NLP evaluation metrics for text generation.\n",
    "\n",
    "    Args:\n",
    "        text_transcriptions (list): List of ground-truth reference sentences.\n",
    "        gpt_decoded (list): List of model-generated sentences.\n",
    "\n",
    "    Returns:\n",
    "        dict: Dictionary containing all computed metrics.\n",
    "    \"\"\"\n",
    "\n",
    "    #remove punctuation, strip and lower case\n",
    "\n",
    "\n",
    "    text_transcriptions = [preprocess_text(text) for text in text_transcriptions]\n",
    "    gpt_decoded = [preprocess_text(text) for text in gpt_decoded]\n",
    "\n",
    "    results = {}\n",
    "\n",
    "    # WER (Word Error Rate)\n",
    "    wer = jiwer.wer(text_transcriptions, gpt_decoded)\n",
    "    results[\"WER\"] = wer\n",
    "\n",
    "    # BLEU Score\n",
    "    bleu = sacrebleu.corpus_bleu(gpt_decoded, [text_transcriptions]).score\n",
    "    results[\"BLEU\"] = bleu\n",
    "\n",
    "    # ROUGE Scores\n",
    "    rouge = rouge_scorer.RougeScorer([\"rouge1\", \"rouge2\", \"rougeL\"], use_stemmer=True)\n",
    "    rouge_scores = [rouge.score(ref, pred) for ref, pred in zip(text_transcriptions, gpt_decoded)]\n",
    "    results[\"ROUGE-1\"] = np.mean([score[\"rouge1\"].fmeasure for score in rouge_scores])\n",
    "    results[\"ROUGE-2\"] = np.mean([score[\"rouge2\"].fmeasure for score in rouge_scores])\n",
    "    results[\"ROUGE-L\"] = np.mean([score[\"rougeL\"].fmeasure for score in rouge_scores])\n",
    "\n",
    "    ##METEOR\n",
    "    tokenized_references = [ref.split() for ref in text_transcriptions]  # Tokenize reference sentences\n",
    "    tokenized_hypotheses = [pred.split() for pred in gpt_decoded]  # Tokenize predicted sentences\n",
    "\n",
    "    meteor_scores = [meteor_score([ref], pred) for ref, pred in zip(tokenized_references, tokenized_hypotheses)]\n",
    "    results[\"METEOR\"] = np.mean(meteor_scores)\n",
    "    # BERTScore (Semantic Similarity)\n",
    "    P, R, F1 = bert_score.score(gpt_decoded, text_transcriptions, lang=\"en\", rescale_with_baseline=True)\n",
    "    results[\"BERTScore_Precision\"] = P.mean().item()\n",
    "    results[\"BERTScore_Recall\"] = R.mean().item()\n",
    "    results[\"BERTScore_F1\"] = F1.mean().item()\n",
    "\n",
    "    ## save also all values without recomputing when possible\n",
    "    results[\"METEOR_scores\"] = meteor_scores\n",
    "    results[\"ROUGE_scores\"] = rouge_scores\n",
    "\n",
    "    results[\"WER_scores\"] = [jiwer.wer([ref], [pred]) for ref, pred in zip(text_transcriptions, gpt_decoded)]\n",
    "    results[\"BERTScore_F1_scores\"] = F1.cpu().numpy().tolist()\n",
    "    return results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "425f42f1",
   "metadata": {},
   "outputs": [],
   "source": [
    "CLEAN = False\n",
    "if CLEAN:\n",
    "    # Clean the decoded sentences\n",
    "    decoded_sentences = [clean_prediction(sentence) for sentence in decoded_sentences]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d52e2e0d",
   "metadata": {},
   "outputs": [],
   "source": [
    "# clean_decoded_sentences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc227d0d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']\n",
      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
      "Warning: Empty candidate sentence detected; setting raw BERTscores to 0.\n",
      "Warning: Empty reference sentence detected; setting raw BERTScores to 0.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WER: 0.2745\n",
      "BLEU: 62.5477\n",
      "ROUGE-1: 0.7858\n",
      "ROUGE-2: 0.6911\n",
      "ROUGE-L: 0.7852\n",
      "METEOR: 0.7881\n",
      "BERTScore_Precision: nan\n",
      "BERTScore_Recall: nan\n",
      "BERTScore_F1: nan\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Warning: Empty candidate sentence detected; setting raw BERTscores to 0.\n",
      "Warning: Empty candidate sentence detected; setting raw BERTscores to 0.\n"
     ]
    }
   ],
   "source": [
    "\n",
    "metrics = compute_metrics(sentences,decoded_sentences)\n",
    "for metric, score in metrics.items():\n",
    "    if \"scores\" not in metric:\n",
    "        print(f\"{metric}: {score:.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f7869730",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "results_df = pd.DataFrame({\n",
    "    \"target_sentence\": sentences,\n",
    "    \"pred_sentence\": decoded_sentences,\n",
    "})\n",
    "\n",
    "#unfold cer_list\n",
    "# cer_list_unfold = [item for sublist in cer_list for item in sublist]\n",
    "\n",
    "results_df[\"WER_scores\"] = metrics[\"WER_scores\"]\n",
    "results_df[\"METEOR_scores\"] = metrics[\"METEOR_scores\"]\n",
    "results_df[\"ROUGE_scores\"] = metrics[\"ROUGE_scores\"]\n",
    "results_df[\"BERTScore_F1_scores\"] = metrics[\"BERTScore_F1_scores\"]\n",
    "\n",
    "results_df.to_csv(f\"{results_dir}/language_results.csv\", index=False)\n",
    "\n",
    "overall_metrics = {k:v for k,v in metrics.items() if \"scores\" not in k}\n",
    "\n",
    "metrics_df = pd.DataFrame(overall_metrics, index=[0])\n",
    "metrics_df.to_csv(f\"{results_dir}/language_metrics.csv\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "79c99680",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target_sentence</th>\n",
       "      <th>pred_sentence</th>\n",
       "      <th>WER_scores</th>\n",
       "      <th>METEOR_scores</th>\n",
       "      <th>ROUGE_scores</th>\n",
       "      <th>BERTScore_F1_scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>291</th>\n",
       "      <td>Developing everything.</td>\n",
       "      <td>well and everything d d d d d d d d d d</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>0.161290</td>\n",
       "      <td>{'rouge1': (0.07692307692307693, 0.5, 0.133333...</td>\n",
       "      <td>3.762290e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>Social arbitrage.</td>\n",
       "      <td>social fabric d d d d d d d d d</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>0.172414</td>\n",
       "      <td>{'rouge1': (0.09090909090909091, 0.5, 0.153846...</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>287</th>\n",
       "      <td>Summary points.</td>\n",
       "      <td>summary points d d d d d d d d d d</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>0.625000</td>\n",
       "      <td>{'rouge1': (0.16666666666666666, 1.0, 0.285714...</td>\n",
       "      <td>3.386982e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>Clear pronunciation is appreciated.</td>\n",
       "      <td>their position is completed d d d d d d d d d ...</td>\n",
       "      <td>4.750000</td>\n",
       "      <td>0.089286</td>\n",
       "      <td>{'rouge1': (0.05, 0.25, 0.08333333333333334), ...</td>\n",
       "      <td>-1.231788e+01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>187</th>\n",
       "      <td>Focal point.</td>\n",
       "      <td>focal point d d d d d d d d d</td>\n",
       "      <td>4.500000</td>\n",
       "      <td>0.646552</td>\n",
       "      <td>{'rouge1': (0.18181818181818182, 1.0, 0.307692...</td>\n",
       "      <td>-1.384532e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>406</th>\n",
       "      <td>Capital punishment.</td>\n",
       "      <td>people proficient in d d d d d d</td>\n",
       "      <td>4.500000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>{'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....</td>\n",
       "      <td>6.463999e+29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>416</th>\n",
       "      <td>Former employers.</td>\n",
       "      <td>probably because d d d d d d</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>{'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....</td>\n",
       "      <td>3.395386e+29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>369</th>\n",
       "      <td>I'm wearing shorts.</td>\n",
       "      <td>i'm really one ct c c c c c c c c c</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.125000</td>\n",
       "      <td>{'rouge1': (0.07692307692307693, 0.33333333333...</td>\n",
       "      <td>-8.875957e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>186</th>\n",
       "      <td>Public transportation.</td>\n",
       "      <td>public transportation d d d d d d d d</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>0.669643</td>\n",
       "      <td>{'rouge1': (0.2, 1.0, 0.33333333333333337), 'r...</td>\n",
       "      <td>-4.692108e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>You're boiling milk, ain't you?</td>\n",
       "      <td>you're pulling back at you d d d d d d d d d d...</td>\n",
       "      <td>3.800000</td>\n",
       "      <td>0.151515</td>\n",
       "      <td>{'rouge1': (0.09523809523809523, 0.4, 0.153846...</td>\n",
       "      <td>-8.574307e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>Intelligence jabbed at him accusingly.</td>\n",
       "      <td>inflation standing at him until d d d d d d d ...</td>\n",
       "      <td>3.800000</td>\n",
       "      <td>0.284091</td>\n",
       "      <td>{'rouge1': (0.09523809523809523, 0.4, 0.153846...</td>\n",
       "      <td>1.903165e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>275</th>\n",
       "      <td>Killer bee instinct.</td>\n",
       "      <td>killer b antic d d d d d d d d d</td>\n",
       "      <td>3.666667</td>\n",
       "      <td>0.128205</td>\n",
       "      <td>{'rouge1': (0.08333333333333333, 0.33333333333...</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>178</th>\n",
       "      <td>Who sponsors you?</td>\n",
       "      <td>her pucker u d d d d d d d d</td>\n",
       "      <td>3.666667</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>{'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....</td>\n",
       "      <td>-1.568277e+07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>469</th>\n",
       "      <td>Were thoroughbreds.</td>\n",
       "      <td>we opened a d d d d</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>{'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....</td>\n",
       "      <td>-6.549143e+29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>79</th>\n",
       "      <td>Marksmanship example.</td>\n",
       "      <td>marksmanship example d d d d d d d</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>0.694444</td>\n",
       "      <td>{'rouge1': (0.2222222222222222, 1.0, 0.3636363...</td>\n",
       "      <td>7.739731e+12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>This kid's froze bad.</td>\n",
       "      <td>this case was bad d d d d d d d d d d d d</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>0.192308</td>\n",
       "      <td>{'rouge1': (0.125, 0.5, 0.2), 'rouge2': (0.0, ...</td>\n",
       "      <td>-6.854510e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>878</th>\n",
       "      <td>Mystery movies.</td>\n",
       "      <td>literary movie d d d d d</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>0.200000</td>\n",
       "      <td>{'rouge1': (0.14285714285714285, 0.5, 0.222222...</td>\n",
       "      <td>-4.925075e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>224</th>\n",
       "      <td>Contact points.</td>\n",
       "      <td>contact points d d d d d d d</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>0.694444</td>\n",
       "      <td>{'rouge1': (0.2222222222222222, 1.0, 0.3636363...</td>\n",
       "      <td>8.707256e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>Yet we no longer feel uneasy.</td>\n",
       "      <td>that was no longer for any d d d d d d d d d d...</td>\n",
       "      <td>3.500000</td>\n",
       "      <td>0.243506</td>\n",
       "      <td>{'rouge1': (0.08695652173913043, 0.33333333333...</td>\n",
       "      <td>-3.087504e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>366</th>\n",
       "      <td>Very well persuaded.</td>\n",
       "      <td>very well protected d d d d d d d d d</td>\n",
       "      <td>3.333333</td>\n",
       "      <td>0.480769</td>\n",
       "      <td>{'rouge1': (0.16666666666666666, 0.66666666666...</td>\n",
       "      <td>-5.711492e+00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                            target_sentence  \\\n",
       "291                  Developing everything.   \n",
       "259                       Social arbitrage.   \n",
       "287                         Summary points.   \n",
       "28      Clear pronunciation is appreciated.   \n",
       "187                            Focal point.   \n",
       "406                     Capital punishment.   \n",
       "416                       Former employers.   \n",
       "369                     I'm wearing shorts.   \n",
       "186                  Public transportation.   \n",
       "23          You're boiling milk, ain't you?   \n",
       "63   Intelligence jabbed at him accusingly.   \n",
       "275                    Killer bee instinct.   \n",
       "178                       Who sponsors you?   \n",
       "469                     Were thoroughbreds.   \n",
       "79                    Marksmanship example.   \n",
       "44                    This kid's froze bad.   \n",
       "878                         Mystery movies.   \n",
       "224                         Contact points.   \n",
       "60            Yet we no longer feel uneasy.   \n",
       "366                    Very well persuaded.   \n",
       "\n",
       "                                         pred_sentence  WER_scores  \\\n",
       "291            well and everything d d d d d d d d d d    6.000000   \n",
       "259                    social fabric d d d d d d d d d    5.000000   \n",
       "287                 summary points d d d d d d d d d d    5.000000   \n",
       "28   their position is completed d d d d d d d d d ...    4.750000   \n",
       "187                      focal point d d d d d d d d d    4.500000   \n",
       "406                   people proficient in d d d d d d    4.500000   \n",
       "416                       probably because d d d d d d    4.000000   \n",
       "369                i'm really one ct c c c c c c c c c    4.000000   \n",
       "186              public transportation d d d d d d d d    4.000000   \n",
       "23   you're pulling back at you d d d d d d d d d d...    3.800000   \n",
       "63   inflation standing at him until d d d d d d d ...    3.800000   \n",
       "275                   killer b antic d d d d d d d d d    3.666667   \n",
       "178                       her pucker u d d d d d d d d    3.666667   \n",
       "469                                we opened a d d d d    3.500000   \n",
       "79                  marksmanship example d d d d d d d    3.500000   \n",
       "44           this case was bad d d d d d d d d d d d d    3.500000   \n",
       "878                           literary movie d d d d d    3.500000   \n",
       "224                       contact points d d d d d d d    3.500000   \n",
       "60   that was no longer for any d d d d d d d d d d...    3.500000   \n",
       "366              very well protected d d d d d d d d d    3.333333   \n",
       "\n",
       "     METEOR_scores                                       ROUGE_scores  \\\n",
       "291       0.161290  {'rouge1': (0.07692307692307693, 0.5, 0.133333...   \n",
       "259       0.172414  {'rouge1': (0.09090909090909091, 0.5, 0.153846...   \n",
       "287       0.625000  {'rouge1': (0.16666666666666666, 1.0, 0.285714...   \n",
       "28        0.089286  {'rouge1': (0.05, 0.25, 0.08333333333333334), ...   \n",
       "187       0.646552  {'rouge1': (0.18181818181818182, 1.0, 0.307692...   \n",
       "406       0.000000  {'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....   \n",
       "416       0.000000  {'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....   \n",
       "369       0.125000  {'rouge1': (0.07692307692307693, 0.33333333333...   \n",
       "186       0.669643  {'rouge1': (0.2, 1.0, 0.33333333333333337), 'r...   \n",
       "23        0.151515  {'rouge1': (0.09523809523809523, 0.4, 0.153846...   \n",
       "63        0.284091  {'rouge1': (0.09523809523809523, 0.4, 0.153846...   \n",
       "275       0.128205  {'rouge1': (0.08333333333333333, 0.33333333333...   \n",
       "178       0.000000  {'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....   \n",
       "469       0.000000  {'rouge1': (0.0, 0.0, 0.0), 'rouge2': (0.0, 0....   \n",
       "79        0.694444  {'rouge1': (0.2222222222222222, 1.0, 0.3636363...   \n",
       "44        0.192308  {'rouge1': (0.125, 0.5, 0.2), 'rouge2': (0.0, ...   \n",
       "878       0.200000  {'rouge1': (0.14285714285714285, 0.5, 0.222222...   \n",
       "224       0.694444  {'rouge1': (0.2222222222222222, 1.0, 0.3636363...   \n",
       "60        0.243506  {'rouge1': (0.08695652173913043, 0.33333333333...   \n",
       "366       0.480769  {'rouge1': (0.16666666666666666, 0.66666666666...   \n",
       "\n",
       "     BERTScore_F1_scores  \n",
       "291         3.762290e+06  \n",
       "259         1.000000e+00  \n",
       "287         3.386982e+06  \n",
       "28         -1.231788e+01  \n",
       "187        -1.384532e+07  \n",
       "406         6.463999e+29  \n",
       "416         3.395386e+29  \n",
       "369        -8.875957e+00  \n",
       "186        -4.692108e+06  \n",
       "23         -8.574307e+00  \n",
       "63          1.903165e+00  \n",
       "275         1.000000e+00  \n",
       "178        -1.568277e+07  \n",
       "469        -6.549143e+29  \n",
       "79          7.739731e+12  \n",
       "44         -6.854510e-01  \n",
       "878        -4.925075e+00  \n",
       "224         8.707256e+05  \n",
       "60         -3.087504e-01  \n",
       "366        -5.711492e+00  "
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results_df.sort_values(\"WER_scores\", ascending=False).head(20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dc373308",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>target_sentence</th>\n",
       "      <th>pred_sentence</th>\n",
       "      <th>WER_scores</th>\n",
       "      <th>METEOR_scores</th>\n",
       "      <th>ROUGE_scores</th>\n",
       "      <th>BERTScore_F1_scores</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>113</th>\n",
       "      <td>Just saying that clear vision is a necessity.</td>\n",
       "      <td>just saying that clear vision is a necessity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999023</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>3.787241e+12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>253</th>\n",
       "      <td>So it was really too late to do much.</td>\n",
       "      <td>so it was really too late to do much</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999314</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-1.201724e+06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>669</th>\n",
       "      <td>Are they all here in the area?</td>\n",
       "      <td>are they all here in the area</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-2.009376e+13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>194</th>\n",
       "      <td>The best teachers and the best students.</td>\n",
       "      <td>the best teachers and the best students</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-4.925075e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231</th>\n",
       "      <td>I'm away from my other son during those hours.</td>\n",
       "      <td>i'm away from my other son during those hours</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999314</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>9.039256e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>227</th>\n",
       "      <td>They fell out because of the drug issue.</td>\n",
       "      <td>they fell out because of the drug issue</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999023</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>3.001747e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>252</th>\n",
       "      <td>What you need is a tougher punishment regime.</td>\n",
       "      <td>what you need is a tougher punishment regime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999023</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>5.447189e+04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>251</th>\n",
       "      <td>Pass me that joint bank account statement.</td>\n",
       "      <td>pass me that joint bank account statement</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-2.821577e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>250</th>\n",
       "      <td>Let's see about getting parents more involved.</td>\n",
       "      <td>let's see about getting parents more involved</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>5.160736e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>638</th>\n",
       "      <td>Trump has said there was no collusion.</td>\n",
       "      <td>trump has said there was no collusion</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-4.925075e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>244</th>\n",
       "      <td>And just keep chipping away at the ice.</td>\n",
       "      <td>and just keep chipping away at the ice</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999023</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>1.434991e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>241</th>\n",
       "      <td>Is there work to support this conclusion?</td>\n",
       "      <td>is there work to support this conclusion</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-3.689665e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>240</th>\n",
       "      <td>I only saw the end of the first quarter.</td>\n",
       "      <td>i only saw the end of the first quarter</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999314</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-7.734196e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>239</th>\n",
       "      <td>I do that when we travel.</td>\n",
       "      <td>i do that when we travel</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.997685</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>8.813053e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>237</th>\n",
       "      <td>Pass me that joint bank account statement.</td>\n",
       "      <td>pass me that joint bank account statement</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.998542</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>2.290312e+05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>450</th>\n",
       "      <td>I really would like to see them do well.</td>\n",
       "      <td>i really would like to see them do well</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999314</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>1.000000e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>749</th>\n",
       "      <td>To grow up and to decide what they want to do.</td>\n",
       "      <td>to grow up and to decide what they want to do</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999624</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-4.925075e+00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>658</th>\n",
       "      <td>That would be a really good thing to do.</td>\n",
       "      <td>that would be a really good thing to do</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999314</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-2.142541e+08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>643</th>\n",
       "      <td>That would have been best time to go.</td>\n",
       "      <td>that would have been best time to go</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999023</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>1.501731e+11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>246</th>\n",
       "      <td>My husband has a tool shed out back.</td>\n",
       "      <td>my husband has a tool shed out back</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.999023</td>\n",
       "      <td>{'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....</td>\n",
       "      <td>-9.357001e+05</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                    target_sentence  \\\n",
       "113   Just saying that clear vision is a necessity.   \n",
       "253           So it was really too late to do much.   \n",
       "669                  Are they all here in the area?   \n",
       "194        The best teachers and the best students.   \n",
       "231  I'm away from my other son during those hours.   \n",
       "227        They fell out because of the drug issue.   \n",
       "252   What you need is a tougher punishment regime.   \n",
       "251      Pass me that joint bank account statement.   \n",
       "250  Let's see about getting parents more involved.   \n",
       "638          Trump has said there was no collusion.   \n",
       "244         And just keep chipping away at the ice.   \n",
       "241       Is there work to support this conclusion?   \n",
       "240        I only saw the end of the first quarter.   \n",
       "239                       I do that when we travel.   \n",
       "237      Pass me that joint bank account statement.   \n",
       "450        I really would like to see them do well.   \n",
       "749  To grow up and to decide what they want to do.   \n",
       "658        That would be a really good thing to do.   \n",
       "643           That would have been best time to go.   \n",
       "246            My husband has a tool shed out back.   \n",
       "\n",
       "                                     pred_sentence  WER_scores  METEOR_scores  \\\n",
       "113   just saying that clear vision is a necessity         0.0       0.999023   \n",
       "253           so it was really too late to do much         0.0       0.999314   \n",
       "669                  are they all here in the area         0.0       0.998542   \n",
       "194        the best teachers and the best students         0.0       0.998542   \n",
       "231  i'm away from my other son during those hours         0.0       0.999314   \n",
       "227        they fell out because of the drug issue         0.0       0.999023   \n",
       "252   what you need is a tougher punishment regime         0.0       0.999023   \n",
       "251      pass me that joint bank account statement         0.0       0.998542   \n",
       "250  let's see about getting parents more involved         0.0       0.998542   \n",
       "638          trump has said there was no collusion         0.0       0.998542   \n",
       "244         and just keep chipping away at the ice         0.0       0.999023   \n",
       "241       is there work to support this conclusion         0.0       0.998542   \n",
       "240        i only saw the end of the first quarter         0.0       0.999314   \n",
       "239                       i do that when we travel         0.0       0.997685   \n",
       "237      pass me that joint bank account statement         0.0       0.998542   \n",
       "450        i really would like to see them do well         0.0       0.999314   \n",
       "749  to grow up and to decide what they want to do         0.0       0.999624   \n",
       "658        that would be a really good thing to do         0.0       0.999314   \n",
       "643           that would have been best time to go         0.0       0.999023   \n",
       "246            my husband has a tool shed out back         0.0       0.999023   \n",
       "\n",
       "                                          ROUGE_scores  BERTScore_F1_scores  \n",
       "113  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         3.787241e+12  \n",
       "253  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -1.201724e+06  \n",
       "669  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -2.009376e+13  \n",
       "194  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -4.925075e+00  \n",
       "231  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         9.039256e+05  \n",
       "227  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         3.001747e+05  \n",
       "252  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         5.447189e+04  \n",
       "251  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -2.821577e+05  \n",
       "250  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         5.160736e+05  \n",
       "638  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -4.925075e+00  \n",
       "244  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         1.434991e+05  \n",
       "241  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -3.689665e+05  \n",
       "240  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -7.734196e+05  \n",
       "239  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         8.813053e+05  \n",
       "237  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         2.290312e+05  \n",
       "450  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         1.000000e+00  \n",
       "749  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -4.925075e+00  \n",
       "658  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -2.142541e+08  \n",
       "643  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....         1.501731e+11  \n",
       "246  {'rouge1': (1.0, 1.0, 1.0), 'rouge2': (1.0, 1....        -9.357001e+05  "
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results_df.sort_values(\"WER_scores\", ascending=False).tail(20)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "LanguageModelDecoder",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
