<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Teaser Figure for \ours{}</title>
  <style>
    body {
      font-family: Arial, sans-serif;
      margin: 20px;
      background: #fff;
    }
    .figure-container {
      display: flex;
      flex-wrap: wrap;
      justify-content: space-between;
      border: 1px solid #ccc;
      padding: 20px;
      box-sizing: border-box;
      width: 100%;
    }
    .panel {
      flex: 1;
      min-width: 300px;
      border: 1px solid #ddd;
      margin: 5px;
      padding: 10px;
      box-sizing: border-box;
    }
    .panel h2 {
      font-size: 16px;
      margin-bottom: 10px;
      text-align: center;
    }
    svg {
      width: 100%;
      height: auto;
      border: 1px solid #eee;
      background: #fafafa;
      margin-bottom: 10px;
    }
    .caption {
      font-size: 14px;
      text-align: center;
      color: #333;
    }
    .figure-caption {
      margin-top: 20px;
      font-weight: bold;
      text-align: center;
    }
  </style>
</head>
<body>
  <div class="figure-container">
    <!-- Panel A: Adaptive Chain-of-Thought Generation -->
    <div class="panel">
      <h2>Panel A: Adaptive CoT Generation</h2>
      <svg viewBox="0 0 300 200">
        <!-- Input prompt box -->
        <rect x="10" y="10" width="80" height="40" fill="#e0e0e0" stroke="#333"/>
        <text x="50" y="35" font-size="10" text-anchor="middle" fill="#333">Input</text>
        <!-- Appended instruction box -->
        <rect x="10" y="60" width="80" height="40" fill="#c0e0ff" stroke="#333"/>
        <text x="50" y="85" font-size="10" text-anchor="middle" fill="#333">Think for 1024 tokens</text>
        <!-- Arrow from input to model -->
        <line x1="90" y1="30" x2="130" y2="30" stroke="#333" stroke-width="1.5" marker-end="url(#arrow)"/>
        <!-- Model (Transformer icon) -->
        <rect x="130" y="10" width="60" height="80" fill="#ffe0b2" stroke="#333"/>
        <text x="160" y="50" font-size="10" text-anchor="middle" fill="#333">LLM</text>
        <!-- Arrow from model to CoT bubbles -->
        <line x1="190" y1="50" x2="230" y2="50" stroke="#333" stroke-width="1.5" marker-end="url(#arrow)"/>
        <!-- Chain-of-Thought bubbles -->
        <circle cx="250" cy="30" r="10" fill="#dcedc8" stroke="#333"/>
        <text x="250" y="35" font-size="8" text-anchor="middle" fill="#333">1</text>
        <circle cx="270" cy="50" r="10" fill="#dcedc8" stroke="#333"/>
        <text x="270" y="55" font-size="8" text-anchor="middle" fill="#333">2</text>
        <circle cx="250" cy="70" r="10" fill="#dcedc8" stroke="#333"/>
        <text x="250" y="75" font-size="8" text-anchor="middle" fill="#333">3</text>
        <!-- Arrow marker definition -->
        <defs>
          <marker id="arrow" markerWidth="10" markerHeight="10" refX="0" refY="3" orient="auto">
            <path d="M0,0 L0,6 L9,3 z" fill="#333" />
          </marker>
        </defs>
      </svg>
      <div class="caption">Conditioned input leads to adaptive chain-of-thought generation</div>
    </div>

    <!-- Panel B: RL-based Length Controlled Policy Optimization -->
    <div class="panel">
      <h2>Panel B: RL-based Length Control</h2>
      <svg viewBox="0 0 300 200">
        <!-- LLM Block -->
        <rect x="20" y="20" width="80" height="40" fill="#d1c4e9" stroke="#333"/>
        <text x="60" y="45" font-size="10" text-anchor="middle" fill="#333">LLM</text>
        <!-- Output Block -->
        <rect x="20" y="80" width="80" height="40" fill="#c8e6c9" stroke="#333"/>
        <text x="60" y="105" font-size="10" text-anchor="middle" fill="#333">Output</text>
        <!-- Arrow from LLM to Output -->
        <line x1="60" y1="60" x2="60" y2="80" stroke="#333" stroke-width="1.5" marker-end="url(#arrow2)"/>
        <!-- Reward Function Annotation -->
        <text x="150" y="50" font-size="10" fill="#333">
          r = I(correct) - α|n_gold - n|
        </text>
        <!-- RL Update Block -->
        <rect x="200" y="80" width="60" height="40" fill="#ffccbc" stroke="#333"/>
        <text x="230" y="105" font-size="10" text-anchor="middle" fill="#333">RL Update</text>
        <!-- Arrow from Output to RL Update -->
        <line x1="100" y1="100" x2="200" y2="100" stroke="#333" stroke-width="1.5" marker-end="url(#arrow2)"/>
        <!-- Arrow looping from RL Update back to LLM -->
        <line x1="260" y1="80" x2="260" y2="20" stroke="#333" stroke-width="1.5" marker-end="url(#arrow2)"/>
        <line x1="260" y1="20" x2="60" y2="20" stroke="#333" stroke-width="1.5" marker-end="url(#arrow2)"/>
        <!-- Arrow marker definition -->
        <defs>
          <marker id="arrow2" markerWidth="10" markerHeight="10" refX="0" refY="3" orient="auto">
            <path d="M0,0 L0,6 L9,3 z" fill="#333" />
          </marker>
        </defs>
      </svg>
      <div class="caption">The RL loop optimizes for both accuracy and precise length adherence</div>
    </div>

    <!-- Panel C: Empirical Performance & Efficiency Gains -->
    <div class="panel">
      <h2>Panel C: Empirical Performance & Efficiency</h2>
      <svg viewBox="0 0 300 200">
        <!-- Performance Line Chart -->
        <line x1="30" y1="170" x2="270" y2="170" stroke="#333" stroke-width="1"/>
        <line x1="30" y1="170" x2="30" y2="20" stroke="#333" stroke-width="1"/>
        <polyline fill="none" stroke="#64b5f6" stroke-width="2"
                  points="30,170 70,140 110,110 150,90 190,80 230,75 270,70"/>
        <text x="150" y="190" font-size="10" text-anchor="middle" fill="#333">Token Budget →</text>
        <text x="15" y="95" font-size="10" text-anchor="middle" fill="#333" transform="rotate(-90,15,95)">Accuracy</text>
        <!-- Simple Bar Chart Inset -->
        <rect x="200" y="120" width="15" height="30" fill="#aed581" stroke="#333"/>
        <rect x="220" y="130" width="15" height="20" fill="#aed581" stroke="#333"/>
        <text x="207" y="115" font-size="8" fill="#333">Baseline</text>
        <text x="217" y="115" font-size="8" fill="#333">\\ours</text>
      </svg>
      <div class="caption">Higher accuracy at reduced token budgets compared to baselines</div>
    </div>
  </div>
  <div class="figure-caption">
    Figure 1. Length Controlled Policy Optimization (\ours{}): A visual overview from input conditioning, through RL-based optimization, to empirical performance gains.
  </div>
</body>
</html>