#!/usr/bin/env python3
from transformers import AutoTokenizer

# Test both models
llama_tokenizer = AutoTokenizer.from_pretrained("OpenDFM/ChemDFM-v1.5-8B")
# qwen_tokenizer = AutoTokenizer.from_pretrained("AI4Chem/ChemLLM-7B-Chat-1_5-DPO")
print(llama_tokenizer.bos_token)
print(llama_tokenizer.eos_token)
test_messages = [
    {"role": "user", "content": "What is a molecule?"},
    {"role": "assistant", "content": "A molecule is a group of atoms bonded together."},
    {"role": "user", "content": "What is a molecule?"}
]

print("=== LLAMA TEMPLATE ===")
llama_result = llama_tokenizer.decode(llama_tokenizer.encode(test_messages[0]['content']))
print(llama_result)

# print("\n=== QWEN TEMPLATE ===")
# qwen_result = qwen_tokenizer.apply_chat_template(test_messages, tokenize=False, add_generation_prompt=False, enable_thinking=False)
# print(qwen_result)