import os
import argparse
from transformers import AutoTokenizer, AutoModelForCausalLM

def main():
    parser = argparse.ArgumentParser(
        description="Add special tokens <|im_start|> and <|im_end|> to a Hugging Face model and tokenizer if missing."
    )
    parser.add_argument("pretrained_path", help="Path to the pretrained model and tokenizer")
    args = parser.parse_args()

    pretrained_path = args.pretrained_path
    new_path = pretrained_path + "_im"

    # Load tokenizer and model from the provided path.
    tokenizer = AutoTokenizer.from_pretrained(pretrained_path)
    model = AutoModelForCausalLM.from_pretrained(pretrained_path)

    # Check if the special tokens are already present.
    if "<|im_start|>" in tokenizer.all_special_tokens and "<|im_end|>" in tokenizer.all_special_tokens:
        print("Special tokens already present. No changes made.")
        return

    # Determine which tokens need to be added.
    tokens_to_add = []
    if "<|im_start|>" not in tokenizer.all_special_tokens:
        tokens_to_add.append("<|im_start|>")
    if "<|im_end|>" not in tokenizer.all_special_tokens:
        tokens_to_add.append("<|im_end|>")

    print("Adding special tokens:", tokens_to_add)

    # Update the tokenizer by adding the special tokens.
    tokenizer.add_special_tokens({"additional_special_tokens": tokens_to_add})
    # Resize the model's token embeddings to accommodate the new tokens.
    model.resize_token_embeddings(len(tokenizer))

    # Save the updated model and tokenizer to the new directory.
    os.makedirs(new_path, exist_ok=True)
    model.save_pretrained(new_path)
    tokenizer.save_pretrained(new_path)

    print(f"Updated model and tokenizer saved to {new_path}")

if __name__ == "__main__":
    main()
