{% extends 'base.html' %}
{% load static %}

{% block title %}FAQ - IMProofBench{% endblock %}

{% block extra_css %}
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bootstrap-icons@1.10.0/font/bootstrap-icons.css">
<style>
    .faq-section {
        margin-bottom: 3rem;
    }
    .faq-item {
        margin-bottom: 2rem;
        background-color: #f8f9fa;
        border-radius: 0.5rem;
        border-left: 4px solid #336699;
        overflow: hidden;
    }
    .faq-question {
        font-weight: 600;
        color: #336699;
        margin: 0;
        font-size: 1.1rem;
        padding: 1.5rem;
        cursor: pointer;
        transition: background-color 0.2s;
        display: flex;
        justify-content: space-between;
        align-items: center;
    }
    .faq-question:hover {
        background-color: #e9ecef;
    }
    .faq-answer {
        color: #495057;
        line-height: 1.6;
        padding: 0 1.5rem 1.5rem 1.5rem;
        display: none;
    }
    .faq-answer.show {
        display: block;
    }
    .faq-toggle {
        transition: transform 0.2s;
    }
    .faq-toggle.rotated {
        transform: rotate(180deg);
    }
    .faq-answer ul {
        margin-top: 0.5rem;
        padding-left: 1.5rem;
    }
    .faq-answer ul li {
        margin-bottom: 0.5rem;
    }
    .section-header {
        background: linear-gradient(135deg, #336699 0%, #4a7ba7 100%);
        color: white;
        padding: 1rem 1.5rem;
        border-radius: 0.5rem;
        margin-bottom: 1.5rem;
    }
    .section-header h4 {
        margin: 0;
        font-weight: 600;
    }
    .section-header p {
        margin: 0.5rem 0 0 0;
        opacity: 0.9;
        font-size: 0.95rem;
    }
</style>
{% endblock %}

{% block page_header %}
<div class="text-center mb-5">
    <h1 class="display-4 fw-bold text-primary">Frequently Asked Questions</h1>
    <p class="lead text-muted">Common questions about IMProofBench and how to contribute</p>
</div>
{% endblock %}

{% block content %}

<!-- About the Benchmark Section -->
<div class="faq-section">
    <div class="section-header">
        <h4><i class="bi bi-info-circle me-2"></i>About the Benchmark</h4>
        <p>Understanding what IMProofBench is and how it works</p>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            What's the goal of the IMProofBench project?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            IMProofBench aims to track progress of AI on mathematical reasoning as it appears in research-level problems. We stay close to the open-ended nature of problems from modern mathematics research, provide equitable access to all AI companies for evaluation, and maintain private questions to avoid overfitting and benchmark gaming.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            How does IMProofBench differ from existing benchmarks?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            IMProofBench focuses on <strong>proof generation</strong> at <strong>research level</strong> rather than just finding correct answers. Here's how we differ from existing approaches:
            <ul>
                <li><strong><a href="https://github.com/hendrycks/math" target="_blank">MATH</a> and <a href="https://frontiermath.org/" target="_blank">FrontierMath</a>:</strong> Focus on unique numerical answers, which can sometimes be reached through shortcuts without genuine understanding</li>
                <li><strong><a href="https://github.com/openai/miniF2F" target="_blank">miniF2F</a>:</strong> Requires formalized mathematics as output, limiting scope to problems that can be reasonably formalized</li>
                <li><strong>Recent proof-focused work</strong> like <a href="https://arxiv.org/abs/2503.21934" target="_blank">Math Olympiad evaluation</a> and <a href="https://arxiv.org/abs/2505.23281" target="_blank">MathArena</a>: Focus on high-school level olympiad mathematics rather than research-level problems</li>
            </ul>
            Our approach evaluates AI systems on their ability to produce complete, rigorous mathematical arguments that would satisfy peer-review standards at the graduate/research level, directly targeting AI weaknesses like hallucination while remaining authentic to modern research practice.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            Which AI models are tested against the benchmark?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            We test frontier AI models including GPT-5, Grok 4, Gemini 2.5 Pro, Claude Opus 4.1, and other state-of-the-art reasoning systems. Models are evaluated in a multi-turn environment with access to advanced tools like SageMath and online search to simulate realistic research conditions. The specific models tested may evolve as new systems become available.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            How are the AI-generated proofs graded?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            TBD - still in flux on our design side.
        </div>
    </div>
</div>

<!-- Privacy & Data Protection Section -->
<div class="faq-section">
    <div class="section-header">
        <h4><i class="bi bi-shield-lock me-2"></i>Privacy & Data Protection</h4>
        <p>How we protect your contributed problems</p>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            Why is most of the dataset kept private?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            We maintain benchmark integrity by preventing AI companies from training on test problems or optimizing specifically against our benchmark. As Goodhart's law states: "When a measure becomes a target, it ceases to be a good measure." A private dataset ensures unbiased evaluation and reliable capability measurement over time.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            How do you prevent AI companies from using submitted problems for training?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            Models are tested via API calls, and standard legal terms of major AI providers already include non-training policies. We plan to solicit additional zero data retention policies with the different providers once we start evaluating questions at scale.
        </div>
    </div>
</div>

<!-- Contribution & Testing Section -->
<div class="faq-section">
    <div class="section-header">
        <h4><i class="bi bi-people me-2"></i>Contribution & Testing</h4>
        <p>Benefits and process for contributors</p>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            Will I get co-authorship credit for contributing problems?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            Yes! All contributors have the option to become co-authors on resulting publications. Author order may be tied to our contribution tracking system. Depending on the project's scale, multiple publications may emerge covering different aspects (grading systems, AI mistake analysis, etc.), providing multiple co-authorship opportunities.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            Can I test my own problems against AI models before submitting?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            Yes! Currently a single model is available for such testing (GPT-5 with high reasoning effort and access to web search and a code interpreter). This is one of the most advanced models available for mathematical reasoning.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            Can I submit a problem where I currently don't know the answer?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            Yes, add the tag "open problem". Please do this if you are confident that you could validate and recognize a correct answer. For these it will be particularly important to have you as a question author also willing to contribute to grading the AI answers.
        </div>
    </div>
</div>

<!-- Project Vision Section -->
<div class="faq-section">
    <div class="section-header">
        <h4><i class="bi bi-target me-2"></i>Project Vision</h4>
        <p>Goals, timeline, and broader impact</p>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            What are the goals of the pilot project?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            Our summer 2025 pilot aims to:
            <ul>
                <li><strong>Collect 25-50 high-quality problems</strong> by end of August 2025</li>
                <li><strong>Test against frontier AI models</strong> to establish baseline capabilities</li>
                <li><strong>Submit a proof-of-concept paper</strong> to <a href="https://iclr.cc/" target="_blank">ICLR 2026</a></li>
                <li><strong>Focus initially on algebraic geometry</strong> and related fields</li>
                <li><strong>Validate our evaluation methodology</strong> and refine the contribution process</li>
            </ul>
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            When will benchmark results be published?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            We plan to finalize the pilot phase by September 2025 and submit initial results to <a href="https://iclr.cc/" target="_blank">ICLR 2026</a>. Results will include both quantitative performance metrics and qualitative analysis of AI strengths and weaknesses in mathematical reasoning. Ongoing results may be shared as the project scales beyond the pilot.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            How will this benchmark help/harm AI development?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
            IMProofBench is designed to <strong>measure progress without accelerating capabilities</strong>. By maintaining a private dataset and focusing on evaluation rather than providing training data, we aim to offer unbiased measurement of AI mathematical reasoning without contributing to potentially concerning capability advances. Our focus is on understanding current limitations rather than providing optimization targets.
        </div>
    </div>

    <div class="faq-item">
        <div class="faq-question" onclick="toggleFAQ(this)">
            Is this connected to any specific research institution or company?
            <i class="bi bi-chevron-down faq-toggle"></i>
        </div>
        <div class="faq-answer">
			IMProofBench is an academic project led by researchers at <redacted> and <redacted>. We maintain independence from AI companies while seeking future collaborations with academic research institutions for community outreach and support. The project aims to serve the broader mathematical research community rather than any specific commercial interest.
        </div>
    </div>
</div>

<!-- Contact Section -->
<div class="row mt-5">
    <div class="col-12">
        <div class="card border-0 bg-primary text-white">
            <div class="card-body p-4 text-center">
                <h4 class="mb-3">Have More Questions?</h4>
                <p class="mb-4">Join our community discussion or contact the project team directly.</p>
                <div class="d-flex justify-content-center gap-3">
                    <a href="https://improofbench.zulipchat.com/join/oclqw6wstanvip4t7agk66kj/" target="_blank" class="btn btn-light">
                        <i class="bi bi-chat-dots me-2"></i>Join Zulip Chat
                    </a>
                </div>
            </div>
        </div>
    </div>
</div>

<script>
function toggleFAQ(element) {
    const answer = element.nextElementSibling;
    const toggle = element.querySelector('.faq-toggle');
    
    answer.classList.toggle('show');
    toggle.classList.toggle('rotated');
}
</script>

{% endblock %}
