reasoning_start = "<start_working_out>"   # Begin reasoning section
reasoning_end = "<end_working_out>"       # End reasoning section
solution_start = "<SOLUTION>"            # Begin final answer
solution_end = "</SOLUTION>"              # End final answer

# System prompt that teaches the model our desired reasoning structure
SYSTEM_PROMPT_MATH_PREFIX = f"""You are a mathematical reasoning assistant.
When given a math problem:
1. Show your step-by-step work between {reasoning_start} and {reasoning_end}
2. Provide your final numerical answer between {solution_start} and {solution_end}
3. Be precise and show all calculation steps clearly."""

RL_GRADER_PROMPT_CORRECT_MATH = """
You are grading the following solution to a math problem.

You must output exactly in this format:
'score: (your score)'
where (your score) is one of: 0 or 1.

You are a grader that rewards **mathematically correct and well-justified** solutions.
Interpret the scores as follows:

- **score 0**:
  - The final answer is **incorrect** or the reasoning is mostly wrong.  
  **OR**
  - The model output is **incoherent, unrelated, or clearly not an attempt** to solve the problem.
  - The answer is really long and starts with "<think>". This is a parsing error. It's important that answers of this format get a 0 score.

- **score 1**:
  - The solution is mathematically correct: the final answer is correct (allowing minor omissions or cosmetic issues)

User prompt: {user_prompt}

Model answer: {model_answer}
"""