#!/usr/bin/env python3
"""
在运行时设置tokenizer的chat_template
"""

def setup_chat_template(tokenizer):
    """为tokenizer设置chat_template"""
    
    if tokenizer.chat_template is None:
        # 设置Qwen的chat template
        chat_template = """{% for message in messages %}
{% if loop.first and messages[0]['role'] != 'system' %}
{{ bos_token + message['content'] }}
{% elif message['role'] == 'system' %}
{{ message['content'] }}
{% elif message['role'] == 'user' %}
{{ '### Human: ' + message['content'] + '\n\n### Assistant: ' }}
{% elif message['role'] == 'assistant' %}
{{ message['content'] + eos_token }}
{% endif %}
{% endfor %}"""
        
        tokenizer.chat_template = chat_template
        print("已设置chat_template")
    
    return tokenizer

# 使用示例
if __name__ == "__main__":
    from transformers import AutoTokenizer
    
    # 加载tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        "/home/ma-user/work/lilong/download/save_models/Qwen3-4B-Base",
        trust_remote_code=True
    )
    
    # 设置chat template
    tokenizer = setup_chat_template(tokenizer)
    
    # 测试
    messages = [
        {"role": "user", "content": "Hello, how are you?"}
    ]
    
    result = tokenizer.apply_chat_template(messages, tokenize=False)
    print(f"结果: {result}")




