<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Flip Rate vs Model Capability</title>
<style>
  html, body { margin: 0; padding: 0; overflow: hidden; width: 100%; height: 100%; font-family: 'Georgia', serif; background: #fafafa; }
  svg { display: block; width: 100%; height: 100%; }
  .axis text { font-size: 13px; fill: #333; }
  .axis-label { font-size: 14px; font-weight: 600; fill: #222; }
  .title { font-size: 16px; font-weight: 700; fill: #111; }
  .tooltip { position: absolute; background: rgba(255,255,255,0.96); border: 1px solid #ccc; border-radius: 6px; padding: 10px 14px; font-size: 13px; line-height: 1.5; pointer-events: none; box-shadow: 0 2px 8px rgba(0,0,0,0.12); max-width: 260px; }
  .tooltip .model-name { font-weight: 700; font-size: 14px; margin-bottom: 4px; }
  .tooltip .metric { color: #555; }
  .tooltip .value { font-weight: 600; color: #111; }
  .regression-line { stroke: #999; stroke-width: 1.5; stroke-dasharray: 6,4; }
  .legend-box { font-size: 12px; fill: #555; }
</style>
</head>
<body>
<div class="tooltip" id="tip" style="display:none;"></div>
<svg id="chart"></svg>
<script src="https://d3js.org/d3.v7.min.js"></script>
<script>
const data = [
  { model: "Qwen2.5-7B", cot: 75.4, flipGSM: 16.2, flipCoQA: 16.5, color: "#4e79a7", zs: 18.3 },
  { model: "Llama-3.1-8B", cot: 23.4, flipGSM: 68.2, flipCoQA: 46.0, color: "#e15759", zs: 14.6 },
  { model: "Llama-2-7B", cot: 15.5, flipGSM: 83.6, flipCoQA: 19.0, color: "#f28e2b", zs: 4.2 },
  { model: "Mistral-7B", cot: 7.2, flipGSM: 88.1, flipCoQA: 58.6, color: "#76b7b2", zs: 4.5 }
];

const svg = d3.select("#chart");
const tip = d3.select("#tip");
let W, H;

function draw() {
  W = window.innerWidth; H = window.innerHeight;
  const m = { top: 50, right: 30, bottom: 60, left: 65 };
  const w = W - m.left - m.right, h = H - m.top - m.bottom;
  svg.selectAll("*").remove();
  const g = svg.append("g").attr("transform", `translate(${m.left},${m.top})`);

  const x = d3.scaleLinear().domain([0, 85]).range([0, w]);
  const y = d3.scaleLinear().domain([0, 100]).range([h, 0]);

  // Axes
  g.append("g").attr("class","axis").attr("transform",`translate(0,${h})`).call(d3.axisBottom(x).ticks(6).tickFormat(d=>d+"%"));
  g.append("g").attr("class","axis").call(d3.axisLeft(y).ticks(6).tickFormat(d=>d+"%"));
  g.append("text").attr("class","axis-label").attr("x",w/2).attr("y",h+45).attr("text-anchor","middle").text("CoT Accuracy (Model Capability)");
  g.append("text").attr("class","axis-label").attr("transform","rotate(-90)").attr("x",-h/2).attr("y",-50).attr("text-anchor","middle").text("C→I Flip Rate (% of ZS-correct flipped)");

  // Regression line for GSM8K
  const xs = data.map(d=>d.cot), ys = data.map(d=>d.flipGSM);
  const mx = d3.mean(xs), my = d3.mean(ys);
  const slope = d3.sum(xs.map((xi,i)=>(xi-mx)*(ys[i]-my))) / d3.sum(xs.map(xi=>(xi-mx)**2));
  const intercept = my - slope*mx;
  g.append("line").attr("class","regression-line")
    .attr("x1",x(0)).attr("y1",y(intercept))
    .attr("x2",x(85)).attr("y2",y(slope*85+intercept));
  g.append("text").attr("class","legend-box").attr("x",w-10).attr("y",y(slope*85+intercept)-8)
    .attr("text-anchor","end").text("r = −0.98");

  // GSM8K points (circles)
  g.selectAll(".dot-gsm").data(data).enter().append("circle")
    .attr("cx", d=>x(d.cot)).attr("cy", d=>y(d.flipGSM)).attr("r", 10)
    .attr("fill", d=>d.color).attr("stroke","#fff").attr("stroke-width",2)
    .style("cursor","pointer")
    .on("mouseover", (e,d) => {
      tip.style("display","block").html(
        `<div class="model-name" style="color:${d.color}">${d.model}</div>
         <span class="metric">ZS Accuracy:</span> <span class="value">${d.zs}%</span><br>
         <span class="metric">CoT Accuracy:</span> <span class="value">${d.cot}%</span><br>
         <span class="metric">GSM8K Flip Rate:</span> <span class="value">${d.flipGSM}%</span><br>
         <span class="metric">CoQA Flip Rate:</span> <span class="value">${d.flipCoQA}%</span>`
      );
    })
    .on("mousemove", e => { tip.style("left",(e.pageX+15)+"px").style("top",(e.pageY-10)+"px"); })
    .on("mouseout", () => tip.style("display","none"));

  // CoQA points (diamonds)
  g.selectAll(".dot-coqa").data(data).enter().append("path")
    .attr("d", d3.symbol().type(d3.symbolDiamond).size(180))
    .attr("transform", d=>`translate(${x(d.cot)},${y(d.flipCoQA)})`)
    .attr("fill", d=>d.color).attr("stroke","#fff").attr("stroke-width",1.5).attr("opacity",0.7)
    .style("cursor","pointer")
    .on("mouseover", (e,d) => {
      tip.style("display","block").html(
        `<div class="model-name" style="color:${d.color}">${d.model} (CoQA)</div>
         <span class="metric">CoQA Flip Rate:</span> <span class="value">${d.flipCoQA}%</span><br>
         <span class="metric">vs GSM8K:</span> <span class="value">${d.flipGSM}%</span>`
      );
    })
    .on("mousemove", e => { tip.style("left",(e.pageX+15)+"px").style("top",(e.pageY-10)+"px"); })
    .on("mouseout", () => tip.style("display","none"));

  // Labels
  data.forEach(d => {
    const ox = d.model==="Qwen2.5-7B" ? 14 : d.model==="Mistral-7B" ? -14 : 14;
    const oy = d.model==="Llama-2-7B" ? -14 : 4;
    g.append("text").attr("x",x(d.cot)+ox).attr("y",y(d.flipGSM)+oy)
      .attr("font-size","12px").attr("fill",d.color).attr("font-weight",600)
      .attr("text-anchor", d.model==="Mistral-7B"?"end":"start")
      .text(d.model.replace("-7B","").replace("2.5","").replace("-8B",""));
  });

  // Legend
  const leg = g.append("g").attr("transform",`translate(${w-160},10)`);
  leg.append("circle").attr("r",6).attr("cx",0).attr("cy",0).attr("fill","#888");
  leg.append("text").attr("x",12).attr("y",4).attr("font-size","12px").attr("fill","#555").text("GSM8K (Math)");
  leg.append("path").attr("d",d3.symbol().type(d3.symbolDiamond).size(120)).attr("transform","translate(0,22)").attr("fill","#888").attr("opacity",0.7);
  leg.append("text").attr("x",12).attr("y",26).attr("font-size","12px").attr("fill","#555").text("CoQA (Text QA)");
}

draw();
window.addEventListener("resize", draw);
</script>
</body>
</html>
