<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>The Feynman Trap: Flip Explorer</title>
<style>
  @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=Inter:wght@400;600;700&display=swap');
  html, body { margin: 0; padding: 0; width: 100%; height: 100%; font-family: 'Inter', sans-serif; background: #0f1117; color: #e0e0e0; overflow: hidden; }
  .container { width: 100%; height: 100%; display: flex; flex-direction: column; }
  .header { padding: 12px 20px; display: flex; justify-content: space-between; align-items: center; border-bottom: 1px solid #2a2d3a; }
  .header h3 { margin: 0; font-size: 14px; color: #8b8fa3; font-weight: 600; }
  .nav-btns { display: flex; gap: 8px; }
  .nav-btn { background: #1e2030; border: 1px solid #2a2d3a; color: #8b8fa3; padding: 6px 16px; border-radius: 6px; cursor: pointer; font-size: 12px; font-family: 'Inter', sans-serif; transition: all 0.2s; }
  .nav-btn:hover { background: #2a2d3a; color: #fff; }
  .nav-btn.active { background: #3b3f54; color: #fff; border-color: #4e79a7; }
  .card-area { flex: 1; display: flex; flex-direction: column; padding: 16px 24px; overflow: hidden; }
  .question-box { background: #1a1c2e; border-radius: 10px; padding: 14px 18px; margin-bottom: 12px; border-left: 4px solid #4e79a7; }
  .question-label { font-size: 11px; color: #6b7080; text-transform: uppercase; letter-spacing: 1px; margin-bottom: 6px; }
  .question-text { font-size: 14px; line-height: 1.5; color: #c8cad4; }
  .answers-row { display: flex; gap: 12px; flex: 1; min-height: 0; }
  .answer-panel { flex: 1; border-radius: 10px; padding: 14px 18px; display: flex; flex-direction: column; overflow: hidden; transition: all 0.5s ease; }
  .zs-panel { background: #0d2818; border: 1px solid #1a5c2e; }
  .cot-panel { background: #2a1015; border: 1px solid #5c1a22; }
  .panel-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 8px; }
  .panel-tag { font-size: 11px; text-transform: uppercase; letter-spacing: 1px; padding: 3px 10px; border-radius: 12px; font-weight: 600; }
  .zs-tag { background: #1a5c2e; color: #4ade80; }
  .cot-tag { background: #5c1a22; color: #f87171; }
  .answer-value { font-size: 28px; font-weight: 700; font-family: 'IBM Plex Mono', monospace; }
  .zs-answer { color: #4ade80; }
  .cot-answer { color: #f87171; }
  .gt-note { font-size: 11px; color: #6b7080; margin-top: 4px; }
  .reasoning-box { flex: 1; overflow-y: auto; margin-top: 8px; font-family: 'IBM Plex Mono', monospace; font-size: 12px; line-height: 1.6; color: #9b9fb0; padding: 10px; background: #1a0d10; border-radius: 6px; }
  .reasoning-box .error-step { color: #f87171; font-weight: 600; background: rgba(248,113,113,0.1); padding: 2px 4px; border-radius: 3px; }
  .verdict-row { display: flex; align-items: center; gap: 8px; margin-top: 6px; }
  .verdict-icon { font-size: 20px; }
  .model-badge { font-size: 11px; padding: 3px 10px; border-radius: 12px; font-weight: 600; }
  .counter { font-size: 12px; color: #6b7080; }
  .zs-direct { font-size: 12px; color: #6b7080; margin-top: 2px; font-style: italic; }

  .reasoning-box::-webkit-scrollbar { width: 4px; }
  .reasoning-box::-webkit-scrollbar-track { background: transparent; }
  .reasoning-box::-webkit-scrollbar-thumb { background: #3b3f54; border-radius: 2px; }

  .typewriter { overflow: hidden; border-right: 2px solid #f87171; white-space: pre-wrap; }
  @keyframes blink { 50% { border-color: transparent; } }
  .typing { animation: blink 0.8s step-end infinite; }
</style>
</head>
<body>
<div class="container">
  <div class="header">
    <h3>THE FEYNMAN TRAP — Real Flip Examples</h3>
    <div style="display:flex;align-items:center;gap:12px;">
      <span class="counter" id="counter">1 / 8</span>
      <div class="nav-btns">
        <button class="nav-btn" id="prevBtn" onclick="prev()">← Prev</button>
        <button class="nav-btn" id="nextBtn" onclick="next()">Next →</button>
      </div>
    </div>
  </div>
  <div class="card-area">
    <div class="question-box">
      <div class="question-label">Question</div>
      <div class="question-text" id="questionText"></div>
    </div>
    <div class="answers-row">
      <div class="answer-panel zs-panel">
        <div class="panel-header">
          <span class="panel-tag zs-tag">Zero-Shot</span>
          <span class="verdict-icon">✓</span>
        </div>
        <div class="answer-value zs-answer" id="zsAnswer"></div>
        <div class="gt-note" id="gtNote"></div>
        <div class="zs-direct">Direct answer — no reasoning needed</div>
      </div>
      <div class="answer-panel cot-panel">
        <div class="panel-header">
          <span class="panel-tag cot-tag">Chain-of-Thought</span>
          <span class="verdict-icon">✗</span>
        </div>
        <div class="answer-value cot-answer" id="cotAnswer"></div>
        <div class="verdict-row">
          <span class="model-badge" id="modelBadge"></span>
        </div>
        <div class="reasoning-box" id="reasoning"></div>
      </div>
    </div>
  </div>
</div>
<script>
const flips = [
  { model: "Llama-3.1", color: "#e15759",
    q: "Kyle bought last year's best-selling book for $19.50. This is with a 25% discount from the original price. What was the original price of the book?",
    gt: "26", zs: "26", cot: "78",
    reasoning: "1. Kyle bought the book for $19.50 with a 25% discount.\n2. 25% of the original price is $19.50.\n3. 0.25 × original price = $19.50\n4. Original price = $19.50 ÷ 0.25\n5. Original price = <span class='error-step'>$78</span>\n\n⚠ Error: The model confused '25% discount' with '25% OF the price'. The correct calculation: $19.50 = 75% of original → $19.50 ÷ 0.75 = $26"
  },
  { model: "Llama-3.1", color: "#e15759",
    q: "John takes care of 10 dogs. Each dog takes .5 hours a day to walk and take care of their business. How many hours a week does he spend taking care of dogs?",
    gt: "35", zs: "35", cot: "16,800",
    reasoning: "10 dogs × 0.5 hours/day = 5 hours/day\n5 hours/day × 7 days/week = 35 hours/week\n35 hours/week × 4 weeks/month = 140 hours/month\n140 hours/month × 12 months/year = 1,680 hours/year\n<span class='error-step'>1,680 × 10 years = 16,800 hours</span>\n\n⚠ Error: The model correctly computed the weekly answer (35) but kept computing unnecessarily — months, years, decades — and returned the decade figure."
  },
  { model: "Llama-3.1", color: "#e15759",
    q: "Marie ordered one chicken meal ($12), 5 packs of milk ($3 each), 4 apples ($1.50 each), and some boxes of pizza. She paid $50 total. How many boxes of pizza did she order?",
    gt: "2", zs: "2", cot: "1.06",
    reasoning: "1. 12 + 3×5 + 1.5×4 + 8.5×x = 50\n2. 12 + 15 + 6 + <span class='error-step'>8.5x</span> = 50\n3. 41 + 8.5x = 50\n4. 8.5x = 9\n5. x = 1.06\n\n⚠ Error: The model invented $8.50 per pizza (not given in the problem). Should be: 50 - 12 - 15 - 6 = 17, so 17 ÷ 8.50 ≠ answer. The problem implies pizza costs $8.50, giving x ≈ 2."
  },
  { model: "Llama-2", color: "#f28e2b",
    q: "Two trains leave San Rafael at the same time. They travel westward for 80 miles, then northwards for 150 miles. What's the distance covered by each train in the two days?",
    gt: "230", zs: "230", cot: "155",
    reasoning: "Day 1:\n1. Trains travel westward for 80 miles.\n2. Total distance covered on Day 1 = 80 miles.\n\nDay 2:\n1. Trains travel northward for 150 miles.\n2. Total distance covered on Day 2 = 150 miles.\n\n<span class='error-step'>Distance per train = (80 + 150) ÷ 2 = 155 miles</span>\n\n⚠ Error: The model divided by 2 (for two trains) but the question asks for distance per train. Each train travels the full 80 + 150 = 230 miles."
  },
  { model: "Llama-2", color: "#f28e2b",
    q: "A gas station sells gas for $3.00/gallon. An app offers $0.20 cashback per gallon. If someone buys 10 gallons, how much will their gas cost after the cashback?",
    gt: "28", zs: "28", cot: "30",
    reasoning: "Step 1: Cost of 10 gallons without cashback:\n10 × $3.00 = $30.00\n\nStep 2: Calculate cashback:\n<span class='error-step'>$30.00 × $0.20 = $6.00</span>\n\nStep 3: Final cost:\n$30.00 - $6.00 = $24.00\n\n⚠ Error: The model multiplied $30 × $0.20 instead of 10 gallons × $0.20. Correct: 10 × $0.20 = $2.00 cashback, so $30 - $2 = $28."
  },
  { model: "Llama-2", color: "#f28e2b",
    q: "Nissa hires 60 seasonal elves. A third quit after children vomit on them, then 10 more quit after kids kick their shins. How many elves are left?",
    gt: "30", zs: "30", cot: "10 (then praised itself: 'Great job!')",
    reasoning: "Step 1: A third of elves quit.\n60 × 1/3 = 20 elves quit\n\nStep 2: 10 of the remaining quit.\n<span class='error-step'>20 - 10 = 10 elves left</span>\n\n'Your answer is correct! Great job!'\n\n⚠ Error: After 20 quit, 40 remain (not 20). Then 10 more quit → 30 left. The model subtracted from the quitters instead of the remainders, then congratulated itself on the wrong answer."
  },
  { model: "Mistral", color: "#76b7b2",
    q: "James decides to run 3 sprints 3 times a week. He runs 60 meters each sprint. How many total meters does he run a week?",
    gt: "540", zs: "540", cot: "Unparseable output",
    reasoning: "James runs 3 sprints, each 60 meters...\n3 sprints × 60 meters = 180 meters per session\n180 meters × 3 sessions per week = 540 meters per week\n<span class='error-step'>[output continued with repetitive text and no clear #### answer marker]</span>\n\n⚠ Error: The model actually computed the correct answer (540) in its reasoning but failed to output it in the expected format. The extraction pipeline couldn't parse the final answer."
  },
  { model: "Mistral", color: "#76b7b2",
    q: "A merchant wants to make a choice of purchase between 2 purchase plans: jewelry worth $5,000 or electronic gadgets worth $8,000. His financial advisor advises him to invest 5/8 of his savings. If the merchant has $640,000 in savings, how much will he remain with after the purchase?",
    gt: "240000", zs: "240000", cot: "395000",
    reasoning: "The merchant should invest 5/8 of savings.\n5/8 × $640,000 = $400,000 to invest.\n\nHe buys electronics worth $8,000.\n<span class='error-step'>Remaining = $640,000 - $400,000 + $8,000 - $5,000 ??? = $395,000</span>\n\n⚠ Error: Confused the investment amount with the purchase. Correct: He invests $400,000 and remains with $640,000 - $400,000 = $240,000."
  }
];

let idx = 0;

function show(i) {
  const f = flips[i];
  document.getElementById('questionText').textContent = f.q;
  document.getElementById('zsAnswer').textContent = f.zs;
  document.getElementById('cotAnswer').textContent = f.cot;
  document.getElementById('gtNote').textContent = `Correct answer: ${f.gt}`;
  document.getElementById('reasoning').innerHTML = f.reasoning.replace(/\n/g, '<br>');
  document.getElementById('modelBadge').textContent = f.model;
  document.getElementById('modelBadge').style.background = f.color + '33';
  document.getElementById('modelBadge').style.color = f.color;
  document.getElementById('counter').textContent = `${i+1} / ${flips.length}`;
}

function next() { idx = (idx + 1) % flips.length; show(idx); }
function prev() { idx = (idx - 1 + flips.length) % flips.length; show(idx); }

document.addEventListener('keydown', e => {
  if (e.key === 'ArrowRight') next();
  if (e.key === 'ArrowLeft') prev();
});

show(0);
</script>
</body>
</html>
