import sys
import os
import json
import random
import time
import re

import torch


def get_ppo_prompt_format(args, tokenizer):
                                                           
    if args.model_arch == "qwen2-72b":
        prompt_format = "<|im_start|>system\nyou are a helpful assistant<|im_end|>\n<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n"
        eos_token = tokenizer._tokenizer.eos_token
    elif args.model_arch == "yi_9b":
        prompt_format = "<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n"
        eos_token = None
    elif args.model_arch in ["qwen2.5-math-rm-72b", "qwen2.5-math-1.5b"]:
        prompt_format = "<|im_start|>system\nPlease reason step by step, and put your final answer within \\boxed{}.<|im_end|>\n<|im_start|>user\n{problem}<|im_end|>\n<|im_start|>assistant\n"
        eos_token = tokenizer._tokenizer.eos_token
    elif args.model_arch in ["qwen3", "qwen3-moe"]:
        if torch.distributed.get_rank() == 0:
            print(f"define your own prompt_format according to your task")
        prompt_format = "{problem}"
        eos_token = tokenizer._tokenizer.eos_token
    elif args.model_arch == "llama":
        prompt_format = "{problem}"
        eos_token = tokenizer._tokenizer.eos_token
    else:
                                                      
        prompt_format = "###{problem}\n### Response:\n"
        eos_token = tokenizer._tokenizer.eos_token
    return prompt_format, eos_token


def get_gen_rm_prompt_format(args):
                                    
                                                           
    prompt_format = """<|im_start|>You are a math teacher. Grade the Solution, verifying correctness step by step. Use Expected Answer to find any erroneous step in the Solution. At the end of the Solution verification, when you give your final grade, write it in the form "Verification: Is the answer correct (Yes/No)? X",  where X is either Yes or No.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"""
    return prompt_format
