<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="UTF-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Financial Model Performance</title>
    <script src="https://cdn.tailwindcss.com"></script>
    <link rel="stylesheet" href="../css/tables.css" />
    <link
      rel="stylesheet"
      href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:opsz,wght,FILL,GRAD@20..48,100..700,0..1,-50..200&icon_names=open_in_new"
    />
    <style>
      .material-symbols-outlined {
        font-size: 14px;
        vertical-align: super;
        color: #4a5568;
      }

      a:hover .material-symbols-outlined {
        color: #2b6cb0;
      }
    </style>
  </head>
  <body class="text-gray-900">
    <div class="max-w-7xl mx-auto">
      <p class="text-sm text-gray-500 mb-6">
        Scores: <span class="accuracy-score font-semibold">Accuracy</span> /
        <span class="f1-score font-semibold">F1 Score</span> /
        <span class="bertscore-f1 font-semibold">BERTScore (F1)</span>.
      </p>

      <div class="overflow-x-auto rounded-lg table-container">
        <table class="w-full text-sm text-left whitespace-nowrap">
          <thead
            class="bg-gray-200 text-xs text-gray-600 uppercase tracking-wider"
          >
            <tr>
              <th scope="col" class="px-2 py-2 font-semibold text-left">
                Model Grouping
              </th>
              <th scope="col" class="px-2 py-2 font-semibold text-left">
                Model Name
              </th>
              <th scope="col" class="px-2 py-2 font-semibold text-center">
                Tag Extraction<a href="#" target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </th>
              <th scope="col" class="px-2 py-2 font-semibold text-center">
                Value Extraction<a href="#" target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </th>
              <th scope="col" class="px-2 py-2 font-semibold text-center">
                Formula Construction<a href="#" target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </th>
              <th scope="col" class="px-2 py-2 font-semibold text-center">
                Formula Calculation<a href="#" target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </th>
              <th scope="col" class="px-2 py-2 font-semibold text-center">
                FinanceBench<a
                  href="https://arxiv.org/abs/2311.11944"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </th>
              <th scope="col" class="px-2 py-2 font-semibold text-center">
                Financial Math<a href="#" target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </th>
            </tr>
          </thead>
          <tbody class="divide-y divide-gray-200">
            <!-- Financial Models Group -->
            <tr class="bg-gray-50">
              <td class="px-2 py-2 font-bold align-top" rowspan="1">
                Financial Models
              </td>
              <td class="px-2 py-2 font-semibold text-gray-400">Model</td>
              <td class="px-2 py-2 text-center">-</td>
              <td class="px-2 py-2 text-center">-</td>
              <td class="px-2 py-2 text-center">-</td>
              <td class="px-2 py-2 text-center">-</td>
              <td class="px-2 py-2 text-center">-</td>
              <td class="px-2 py-2 text-center">-</td>
            </tr>

            <!-- Base Models Group -->
            <tr class="bg-white">
              <td class="px-2 py-2 font-bold align-top" rowspan="5">
                Base Models
              </td>
              <td class="px-2 py-2 font-semibold">
                Llama 3.1 8B<a
                  href="https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">69.16</span><br /><span
                  class="f1-score"
                  >0.739</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">52.46</span><br /><span
                  class="f1-score"
                  >0.565</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">12.92</span><br /><span
                  class="f1-score"
                  >0.201</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">27.27</span><br /><span
                  class="f1-score"
                  >0.317</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.443</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">11.00</span><br /><span
                  class="f1-score"
                  >0.136</span
                >
              </td>
            </tr>
            <tr class="bg-white">
              <td class="px-2 py-2 font-semibold">
                Llama 3.1 70B<a
                  href="https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">69.64</span><br /><span
                  class="f1-score"
                  >0.782</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">88.19</span><br /><span
                  class="f1-score"
                  >0.904</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">59.28</span><br /><span
                  class="f1-score"
                  >0.665</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">77.49</span><br /><span
                  class="f1-score"
                  >0.783</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.528</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">10.50</span><br /><span
                  class="f1-score"
                  >0.134</span
                >
              </td>
            </tr>
            <tr class="bg-white">
              <td class="px-2 py-2 font-semibold">
                DeepSeek V3<a
                  href="https://github.com/deepseek-ai/DeepSeek-V3"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">85.03</span><br /><span
                  class="f1-score"
                  >0.849</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">98.01</span><br /><span
                  class="f1-score"
                  >0.982</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">22.75</span><br /><span
                  class="f1-score"
                  >0.315</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">85.99</span><br /><span
                  class="f1-score"
                  >0.868</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.573</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">21.50</span><br /><span
                  class="f1-score"
                  >0.255</span
                >
              </td>
            </tr>
            <tr class="bg-white">
              <td class="px-2 py-2 font-semibold">
                GPT-4o<a
                  href="https://openai.com/index/hello-gpt-4o"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">81.60</span><br /><span
                  class="f1-score"
                  >0.864</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">97.01</span><br /><span
                  class="f1-score"
                  >0.974</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">79.76</span><br /><span
                  class="f1-score"
                  >0.820</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">83.59</span><br /><span
                  class="f1-score"
                  >0.857</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.564</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">27.00</span><br /><span
                  class="f1-score"
                  >0.296</span
                >
              </td>
            </tr>
            <tr class="bg-white">
              <td class="px-2 py-2 font-semibold">
                Gemini 2.0 FL<a
                  href="https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash-lite"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">80.27</span><br /><span
                  class="f1-score"
                  >0.811</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">98.02</span><br /><span
                  class="f1-score"
                  >0.980</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">61.90</span><br /><span
                  class="f1-score"
                  >0.644</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">53.57</span><br /><span
                  class="f1-score"
                  >0.536</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.552</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">19.00</span><br /><span
                  class="f1-score"
                  >0.204</span
                >
              </td>
            </tr>

            <!-- Fine-tuned Models Group -->
            <tr class="bg-gray-50">
              <td class="px-2 py-2 font-bold align-top" rowspan="5">
                Fine-tuned Models
              </td>
              <td class="px-2 py-2 font-semibold">
                Llama 3.1 8B LoRA<a
                  href="#"
                  onclick="alert('Links removed for anonymous review')"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score"><b>89.13</b></span
                ><br /><span class="f1-score">0.886</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">98.49</span><br /><span
                  class="f1-score"
                  >0.986</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">77.61</span><br /><span
                  class="f1-score"
                  >0.876</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">98.68</span><br /><span
                  class="f1-score"
                  >0.990</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.511</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">30.00</span><br /><span
                  class="f1-score"
                  >0.332</span
                >
              </td>
            </tr>
            <tr class="bg-gray-50">
              <td class="px-2 py-2 font-semibold">
                Llama 3.1 8B QLoRA<a
                  href="#"
                  onclick="alert('Links removed for anonymous review')"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">86.89</span><br /><span
                  class="f1-score"
                  >0.872</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">97.14</span><br /><span
                  class="f1-score"
                  >0.974</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">89.34</span><br /><span
                  class="f1-score"
                  ><b>0.898</b></span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">92.81</span><br /><span
                  class="f1-score"
                  >0.947</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.542</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">26.50</span><br /><span
                  class="f1-score"
                  >0.307</span
                >
              </td>
            </tr>
            <tr class="bg-gray-50">
              <td class="px-2 py-2 font-semibold">
                Llama 3.1 8B DoRA<a
                  href="#"
                  onclick="alert('Links removed for anonymous review')"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">80.44</span><br /><span
                  class="f1-score"
                  >0.896</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">98.57</span><br /><span
                  class="f1-score"
                  >0.988</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">88.02</span><br /><span
                  class="f1-score"
                  >0.882</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score"><b>98.92</b></span
                ><br /><span class="f1-score"><b>0.993</b></span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.477</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">28.50</span><br /><span
                  class="f1-score"
                  >0.317</span
                >
              </td>
            </tr>
            <tr class="bg-gray-50">
              <td class="px-2 py-2 font-semibold">
                Llama 3.1 8B rsLoRA<a
                  href="#"
                  onclick="alert('Links removed for anonymous review')"
                  target="_blank"
                  ><span class="material-symbols-outlined">open_in_new</span></a
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">85.26</span><br /><span
                  class="f1-score"
                  >0.879</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">99.13</span><br /><span
                  class="f1-score"
                  ><b>0.992</b></span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score"><b>89.46</b></span
                ><br /><span class="f1-score">0.893</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">98.80</span><br /><span
                  class="f1-score"
                  >0.988</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1"><b>0.575</b></span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">34.50</span><br /><span
                  class="f1-score"
                  >0.370</span
                >
              </td>
            </tr>
            <tr class="bg-gray-50">
              <td class="px-2 py-2 font-semibold">Gemini 2.0 FL N/A</td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">85.03</span><br /><span
                  class="f1-score"
                  ><b>0.907</b></span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score"><b>99.20</b></span
                ><br /><span class="f1-score"><b>0.992</b></span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">67.85</span><br /><span
                  class="f1-score"
                  >0.786</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score">54.76</span><br /><span
                  class="f1-score"
                  >0.548</span
                >
              </td>
              <td class="px-2 py-2 text-center">
                <span class="bertscore-f1">0.544</span>
              </td>
              <td class="px-2 py-2 text-center">
                <span class="accuracy-score"><b>66.00</b></span
                ><br /><span class="f1-score"><b>0.785</b></span>
              </td>
            </tr>
          </tbody>
        </table>
      </div>
    </div>
  </body>
</html>
