<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>The Cost of Thinking</title>
<style>
  @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600;700&family=Inter:wght@400;600;700&display=swap');
  html, body { margin: 0; padding: 0; width: 100%; height: 100%; font-family: 'Inter', sans-serif; background: #0f1117; color: #e0e0e0; overflow: hidden; }
  .dashboard { width: 100%; height: 100%; display: flex; flex-direction: column; }
  .top-bar { padding: 8px 16px; border-bottom: 1px solid #2a2d3a; display: flex; align-items: center; gap: 10px; flex-wrap: wrap; flex-shrink: 0; }
  .top-bar h2 { margin: 0; font-size: 14px; color: #c8cad4; white-space: nowrap; }
  .sep { width: 1px; height: 20px; background: #2a2d3a; flex-shrink: 0; }
  .bar-label { font-size: 10px; color: #6b7080; text-transform: uppercase; letter-spacing: 0.8px; }
  .bar-group { display: flex; gap: 4px; align-items: center; }
  .pill { padding: 4px 12px; border: 1px solid #2a2d3a; border-radius: 14px; background: #1e2030; color: #8b8fa3; font-size: 11px; font-weight: 600; cursor: pointer; transition: all 0.2s; font-family: 'Inter', sans-serif; white-space: nowrap; }
  .pill:hover { background: #2a2d3a; color: #fff; }
  .pill.active { background: #3b3f54; color: #fff; border-color: #4e79a7; }
  .main { flex: 1; padding: 10px 16px; display: grid; grid-template-columns: 1fr 1fr; grid-template-rows: auto auto 1fr; gap: 8px; overflow: hidden; min-height: 0; }
  .stat-card { background: #1a1c2e; border-radius: 10px; padding: 10px 14px; border: 1px solid #2a2d3a; }
  .stat-label { font-size: 10px; color: #6b7080; text-transform: uppercase; letter-spacing: 0.8px; }
  .stat-value { font-size: 28px; font-weight: 700; font-family: 'IBM Plex Mono', monospace; margin-top: 2px; transition: all 0.5s; }
  .stat-sub { font-size: 11px; color: #6b7080; margin-top: 1px; }
  .stat-positive { color: #4ade80; }
  .stat-negative { color: #f87171; }
  .meter-area { grid-column: 1/-1; display: flex; flex-direction: column; gap: 6px; }
  .meter { height: 26px; border-radius: 6px; background: #1a1c2e; overflow: hidden; position: relative; }
  .meter-fill { height: 100%; border-radius: 6px; transition: width 0.8s ease; display: flex; align-items: center; padding-left: 10px; font-size: 12px; font-weight: 600; color: #fff; }
  .meter-label { font-size: 11px; color: #8b8fa3; margin-bottom: 2px; display: flex; justify-content: space-between; }
  .prompt-row { grid-column: 1/-1; display: grid; gap: 6px; align-content: start; }
  .prompt-card { background: #1a1c2e; border-radius: 8px; padding: 8px 10px; border: 1px solid #2a2d3a; text-align: center; transition: all 0.5s; }
  .prompt-card .p-name { font-size: 11px; font-weight: 600; color: #8b8fa3; }
  .prompt-card .p-acc { font-size: 20px; font-weight: 700; font-family: 'IBM Plex Mono', monospace; margin-top: 2px; }
  .prompt-card .p-flips { font-size: 10px; color: #6b7080; margin-top: 1px; }
  .best-tag { display: inline-block; font-size: 9px; background: #4ade8033; color: #4ade80; padding: 1px 6px; border-radius: 8px; margin-top: 2px; }
  .prompt-title { font-size: 10px; color: #6b7080; text-transform: uppercase; letter-spacing: 0.8px; grid-column: 1/-1; margin: 0; }
</style>
</head>
<body>
<div class="dashboard">
  <div class="top-bar">
    <h2>The Cost of Thinking</h2>
    <div class="sep"></div>
    <span class="bar-label">Model</span>
    <div class="bar-group" id="modelBar">
      <button class="pill active" data-m="qwen">Qwen (75%)</button>
      <button class="pill" data-m="llama31">Llama-3.1 (23%)</button>
      <button class="pill" data-m="llama2">Llama-2 (16%)</button>
      <button class="pill" data-m="mistral">Mistral (7%)</button>
    </div>
    <div class="sep"></div>
    <span class="bar-label">Task</span>
    <div class="bar-group" id="taskBar">
      <button class="pill active" data-t="gsm8k">GSM8K</button>
      <button class="pill" data-t="coqa">CoQA</button>
    </div>
  </div>
  <div class="main">
    <div class="stat-card">
      <div class="stat-label">Net Accuracy Change</div>
      <div class="stat-value" id="netDelta">+57.2%</div>
      <div class="stat-sub" id="netSub">ZS 18.3% → CoT 75.4%</div>
    </div>
    <div class="stat-card">
      <div class="stat-label">Correct Answers Flipped</div>
      <div class="stat-value stat-negative" id="flipRate">9.5%</div>
      <div class="stat-sub" id="flipSub">23 of 241 ZS-correct → wrong under CoT</div>
    </div>
    <div class="meter-area">
      <div>
        <div class="meter-label"><span>Zero-Shot</span><span id="zsLbl">18.3%</span></div>
        <div class="meter"><div class="meter-fill" id="zsMeter" style="width:18.3%;background:#6b7080;">ZS</div></div>
      </div>
      <div>
        <div class="meter-label"><span>Chain-of-Thought</span><span id="cotLbl">75.4%</span></div>
        <div class="meter"><div class="meter-fill" id="cotMeter" style="width:75.4%;background:#4e79a7;">CoT</div></div>
      </div>
      <div id="scRow">
        <div class="meter-label"><span>SC@5 (majority vote)</span><span id="scLbl">77.6%</span></div>
        <div class="meter"><div class="meter-fill" id="scMeter" style="width:77.6%;background:#4ade80;">SC@5</div></div>
      </div>
    </div>
    <div class="prompt-row" id="promptRow">
      <div class="prompt-title">Prompt Strategy Comparison</div>
    </div>
  </div>
</div>
<script>
const D = {
  qwen: {
    gsm8k: { zs:18.3, cot:75.4, sc:77.6, delta:57.2, flipCorr:9.5, flipN:"23/241",
             prompts: [{n:"Zero-Shot",a:18.3,f:0},{n:"CoT",a:75.4,f:39},{n:"Brief",a:71.4,f:35},{n:"Verify",a:67.0,f:35}] },
    coqa: { zs:61.8, cot:56.4, sc:null, delta:-5.4, flipCorr:16.5, flipN:"51/309",
            prompts: [{n:"Zero-Shot",a:61.8,f:0},{n:"CoT",a:56.4,f:51}] }
  },
  llama31: {
    gsm8k: { zs:14.6, cot:23.4, sc:32.4, delta:8.9, flipCorr:68.2, flipN:"131/192",
             prompts: [{n:"Zero-Shot",a:14.6,f:0},{n:"CoT",a:23.4,f:131},{n:"Brief",a:29.5,f:106},{n:"Verify",a:13.9,f:122}] },
    coqa: { zs:57.0, cot:36.6, sc:null, delta:-20.4, flipCorr:46.0, flipN:"131/285",
            prompts: [{n:"Zero-Shot",a:57.0,f:0},{n:"CoT",a:36.6,f:131}] }
  },
  llama2: {
    gsm8k: { zs:4.2, cot:15.5, sc:15.8, delta:11.3, flipCorr:74.5, flipN:"41/55",
             prompts: [{n:"Zero-Shot",a:4.2,f:0},{n:"CoT",a:15.5,f:46},{n:"Brief",a:19.3,f:38},{n:"Verify",a:9.9,f:42}] },
    coqa: { zs:57.8, cot:54.2, sc:null, delta:-3.6, flipCorr:19.0, flipN:"55/289",
            prompts: [{n:"Zero-Shot",a:57.8,f:0},{n:"CoT",a:54.2,f:55}] }
  },
  mistral: {
    gsm8k: { zs:4.5, cot:7.2, sc:10.6, delta:2.7, flipCorr:88.1, flipN:"52/59",
             prompts: [{n:"Zero-Shot",a:4.5,f:0},{n:"CoT",a:7.2,f:52},{n:"Brief",a:13.4,f:33},{n:"Verify",a:0.9,f:56}] },
    coqa: { zs:56.0, cot:29.2, sc:null, delta:-26.8, flipCorr:58.6, flipN:"164/280",
            prompts: [{n:"Zero-Shot",a:56.0,f:0},{n:"CoT",a:29.2,f:164}] }
  }
};

let curModel = "qwen", curTask = "gsm8k";

document.querySelectorAll("#modelBar .pill").forEach(b => {
  b.addEventListener("click", () => {
    document.querySelectorAll("#modelBar .pill").forEach(x=>x.classList.remove("active"));
    b.classList.add("active");
    curModel = b.dataset.m;
    update();
  });
});
document.querySelectorAll("#taskBar .pill").forEach(b => {
  b.addEventListener("click", () => {
    document.querySelectorAll("#taskBar .pill").forEach(x=>x.classList.remove("active"));
    b.classList.add("active");
    curTask = b.dataset.t;
    update();
  });
});

function update() {
  const d = D[curModel][curTask];
  const deltaEl = document.getElementById("netDelta");
  deltaEl.textContent = (d.delta>0?"+":"")+d.delta+"%";
  deltaEl.className = "stat-value " + (d.delta>0?"stat-positive":"stat-negative");
  document.getElementById("netSub").textContent = `ZS ${d.zs}% → CoT ${d.cot}%`;

  document.getElementById("flipRate").textContent = d.flipCorr+"%";
  document.getElementById("flipSub").textContent = `${d.flipN} ZS-correct → wrong under CoT`;

  document.getElementById("zsMeter").style.width = Math.max(d.zs, 3)+"%";
  document.getElementById("zsLbl").textContent = d.zs+"%";
  document.getElementById("cotMeter").style.width = Math.max(d.cot, 3)+"%";
  document.getElementById("cotLbl").textContent = d.cot+"%";
  document.getElementById("cotMeter").style.background = d.delta>0?"#4e79a7":"#f87171";

  const scRow = document.getElementById("scRow");
  if (d.sc) {
    scRow.style.display = "";
    document.getElementById("scMeter").style.width = d.sc+"%";
    document.getElementById("scLbl").textContent = d.sc+"%";
  } else {
    scRow.style.display = "none";
  }

  const row = document.getElementById("promptRow");
  row.innerHTML = '<div class="prompt-title">Prompt Strategy Comparison</div>';
  row.style.gridTemplateColumns = `repeat(${d.prompts.length},1fr)`;
  const bestAcc = Math.max(...d.prompts.map(p=>p.a));
  d.prompts.forEach(p => {
    const card = document.createElement("div");
    card.className = "prompt-card";
    const accColor = p.a === bestAcc ? "#4ade80" : (p.f > 0 ? "#c8cad4" : "#8b8fa3");
    card.innerHTML = `<div class="p-name">${p.n}</div><div class="p-acc" style="color:${accColor}">${p.a}%</div><div class="p-flips">${p.f>0?p.f+" C→I flips":"baseline"}</div>${p.a===bestAcc?'<span class="best-tag">BEST</span>':''}`;
    row.appendChild(card);
  });
}

update();
</script>
</body>
</html>
