<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Error Taxonomy</title>
<style>
  html, body { margin: 0; padding: 0; overflow: hidden; width: 100%; height: 100%; font-family: 'Georgia', serif; background: #fafafa; }
  svg { display: block; width: 100%; height: 100%; }
  .tooltip { position: absolute; background: rgba(255,255,255,0.96); border: 1px solid #ccc; border-radius: 6px; padding: 10px 14px; font-size: 13px; line-height: 1.5; pointer-events: none; box-shadow: 0 2px 8px rgba(0,0,0,0.12); }
</style>
</head>
<body>
<div class="tooltip" id="tip" style="display:none;"></div>
<svg id="chart"></svg>
<script src="https://d3js.org/d3.v7.min.js"></script>
<script>
const data = [
  { model: "Qwen2.5-7B", total: 39, extraction: 16, arithmetic: 14, grabbed: 4, magnitude: 2, malformed: 2, near: 1 },
  { model: "Llama-3.1-8B", total: 131, extraction: 0, arithmetic: 30, grabbed: 46, magnitude: 27, malformed: 23, near: 5 },
  { model: "Llama-2-7B", total: 46, extraction: 5, arithmetic: 18, grabbed: 7, magnitude: 8, malformed: 7, near: 1 },
  { model: "Mistral-7B", total: 52, extraction: 0, arithmetic: 11, grabbed: 12, magnitude: 7, malformed: 21, near: 1 }
];

const categories = [
  { key: "extraction", label: "Extraction Failure", color: "#bbb", desc: "Model correct, pipeline error" },
  { key: "arithmetic", label: "Arithmetic Error", color: "#e15759", desc: "Correct setup, wrong computation" },
  { key: "grabbed", label: "Grabbed Intermediate", color: "#f28e2b", desc: "Copied number from problem text" },
  { key: "magnitude", label: "Magnitude Error", color: "#edc948", desc: "Answer off by ≥5×" },
  { key: "malformed", label: "Malformed Output", color: "#76b7b2", desc: "Unparseable CoT output" },
  { key: "near", label: "Near Miss", color: "#59a14f", desc: "Within 5% of correct" }
];

const svg = d3.select("#chart");
const tip = d3.select("#tip");

function draw() {
  const W = window.innerWidth, H = window.innerHeight;
  const m = { top: 40, right: 20, bottom: 90, left: 55 };
  const w = W - m.left - m.right, h = H - m.top - m.bottom;
  svg.selectAll("*").remove();
  const g = svg.append("g").attr("transform", `translate(${m.left},${m.top})`);

  const x = d3.scaleBand().domain(data.map(d=>d.model)).range([0, w]).padding(0.25);
  const y = d3.scaleLinear().domain([0, 100]).range([h, 0]);

  g.append("g").attr("transform",`translate(0,${h})`).call(d3.axisBottom(x)).selectAll("text").attr("font-size","12px");
  g.append("g").call(d3.axisLeft(y).ticks(5).tickFormat(d=>d+"%"));
  g.append("text").attr("transform","rotate(-90)").attr("x",-h/2).attr("y",-40).attr("text-anchor","middle").attr("font-size","13px").attr("fill","#333").attr("font-weight",600).text("Proportion of C→I Flips");

  // Stacked bars (percentage)
  data.forEach(d => {
    let cumY = 0;
    categories.forEach(cat => {
      const pct = (d[cat.key] / d.total) * 100;
      const barY = cumY;
      cumY += pct;

      g.append("rect")
        .attr("x", x(d.model)).attr("y", y(barY + pct)).attr("width", x.bandwidth()).attr("height", y(barY) - y(barY + pct))
        .attr("fill", cat.color).attr("stroke", "#fff").attr("stroke-width", 1)
        .style("cursor","pointer")
        .on("mouseover", e => {
          tip.style("display","block").html(
            `<b>${d.model}</b><br>
             <b style="color:${cat.color}">● ${cat.label}</b><br>
             Count: ${d[cat.key]} / ${d.total} (${pct.toFixed(1)}%)<br>
             <i>${cat.desc}</i>`
          );
        })
        .on("mousemove", e => { tip.style("left",(e.pageX+12)+"px").style("top",(e.pageY-10)+"px"); })
        .on("mouseout", () => tip.style("display","none"));

      // Label inside bar if tall enough
      if (pct > 8) {
        g.append("text")
          .attr("x", x(d.model) + x.bandwidth()/2)
          .attr("y", y(barY + pct/2) + 4)
          .attr("text-anchor","middle").attr("font-size","11px").attr("fill","#fff").attr("font-weight",600)
          .attr("pointer-events","none")
          .text(pct.toFixed(0)+"%");
      }
    });
  });

  // Total count on top
  data.forEach(d => {
    g.append("text")
      .attr("x", x(d.model) + x.bandwidth()/2).attr("y", y(100) - 6)
      .attr("text-anchor","middle").attr("font-size","12px").attr("fill","#333").attr("font-weight",600)
      .text(`n=${d.total}`);
  });

  // Legend — 2 rows × 3 columns
  const cols = 3;
  const colW = w / cols;
  const rowH = 20;
  const legendY0 = h + 32;
  categories.forEach((cat, i) => {
    const row = Math.floor(i / cols);
    const col = i % cols;
    const lx = col * colW + 8;
    const ly = legendY0 + row * rowH;
    g.append("rect").attr("x",lx).attr("y",ly).attr("width",12).attr("height",12).attr("fill",cat.color).attr("rx",2);
    g.append("text").attr("x",lx+18).attr("y",ly+10).attr("font-size","11px").attr("fill","#555").text(cat.label);
  });
}

draw();
window.addEventListener("resize", draw);
</script>
</body>
</html>
